Home | History | Annotate | Download | only in test
      1 # IMPORTANT: the same tests are run from "test_xml_etree_c" in order
      2 # to ensure consistency between the C implementation and the Python
      3 # implementation.
      4 #
      5 # For this purpose, the module-level "ET" symbol is temporarily
      6 # monkey-patched when running the "test_xml_etree_c" test suite.
      7 
      8 import copy
      9 import functools
     10 import html
     11 import io
     12 import operator
     13 import pickle
     14 import sys
     15 import types
     16 import unittest
     17 import warnings
     18 import weakref
     19 
     20 from itertools import product
     21 from test import support
     22 from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
     23 
     24 # pyET is the pure-Python implementation.
     25 #
     26 # ET is pyET in test_xml_etree and is the C accelerated version in
     27 # test_xml_etree_c.
     28 pyET = None
     29 ET = None
     30 
     31 SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
     32 try:
     33     SIMPLE_XMLFILE.encode("utf-8")
     34 except UnicodeEncodeError:
     35     raise unittest.SkipTest("filename is not encodable to utf8")
     36 SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
     37 UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
     38 
     39 SAMPLE_XML = """\
     40 <body>
     41   <tag class='a'>text</tag>
     42   <tag class='b' />
     43   <section>
     44     <tag class='b' id='inner'>subtext</tag>
     45   </section>
     46 </body>
     47 """
     48 
     49 SAMPLE_SECTION = """\
     50 <section>
     51   <tag class='b' id='inner'>subtext</tag>
     52   <nexttag />
     53   <nextsection>
     54     <tag />
     55   </nextsection>
     56 </section>
     57 """
     58 
     59 SAMPLE_XML_NS = """
     60 <body xmlns="http://effbot.org/ns">
     61   <tag>text</tag>
     62   <tag />
     63   <section>
     64     <tag>subtext</tag>
     65   </section>
     66 </body>
     67 """
     68 
     69 SAMPLE_XML_NS_ELEMS = """
     70 <root>
     71 <h:table xmlns:h="hello">
     72   <h:tr>
     73     <h:td>Apples</h:td>
     74     <h:td>Bananas</h:td>
     75   </h:tr>
     76 </h:table>
     77 
     78 <f:table xmlns:f="foo">
     79   <f:name>African Coffee Table</f:name>
     80   <f:width>80</f:width>
     81   <f:length>120</f:length>
     82 </f:table>
     83 </root>
     84 """
     85 
     86 ENTITY_XML = """\
     87 <!DOCTYPE points [
     88 <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
     89 %user-entities;
     90 ]>
     91 <document>&entity;</document>
     92 """
     93 
     94 EXTERNAL_ENTITY_XML = """\
     95 <!DOCTYPE points [
     96 <!ENTITY entity SYSTEM "file:///non-existing-file.xml">
     97 ]>
     98 <document>&entity;</document>
     99 """
    100 
    101 def checkwarnings(*filters, quiet=False):
    102     def decorator(test):
    103         def newtest(*args, **kwargs):
    104             with support.check_warnings(*filters, quiet=quiet):
    105                 test(*args, **kwargs)
    106         functools.update_wrapper(newtest, test)
    107         return newtest
    108     return decorator
    109 
    110 
    111 class ModuleTest(unittest.TestCase):
    112     def test_sanity(self):
    113         # Import sanity.
    114 
    115         from xml.etree import ElementTree
    116         from xml.etree import ElementInclude
    117         from xml.etree import ElementPath
    118 
    119     def test_all(self):
    120         names = ("xml.etree.ElementTree", "_elementtree")
    121         support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",))
    122 
    123 
    124 def serialize(elem, to_string=True, encoding='unicode', **options):
    125     if encoding != 'unicode':
    126         file = io.BytesIO()
    127     else:
    128         file = io.StringIO()
    129     tree = ET.ElementTree(elem)
    130     tree.write(file, encoding=encoding, **options)
    131     if to_string:
    132         return file.getvalue()
    133     else:
    134         file.seek(0)
    135         return file
    136 
    137 def summarize_list(seq):
    138     return [elem.tag for elem in seq]
    139 
    140 
    141 class ElementTestCase:
    142     @classmethod
    143     def setUpClass(cls):
    144         cls.modules = {pyET, ET}
    145 
    146     def pickleRoundTrip(self, obj, name, dumper, loader, proto):
    147         save_m = sys.modules[name]
    148         try:
    149             sys.modules[name] = dumper
    150             temp = pickle.dumps(obj, proto)
    151             sys.modules[name] = loader
    152             result = pickle.loads(temp)
    153         except pickle.PicklingError as pe:
    154             # pyET must be second, because pyET may be (equal to) ET.
    155             human = dict([(ET, "cET"), (pyET, "pyET")])
    156             raise support.TestFailed("Failed to round-trip %r from %r to %r"
    157                                      % (obj,
    158                                         human.get(dumper, dumper),
    159                                         human.get(loader, loader))) from pe
    160         finally:
    161             sys.modules[name] = save_m
    162         return result
    163 
    164     def assertEqualElements(self, alice, bob):
    165         self.assertIsInstance(alice, (ET.Element, pyET.Element))
    166         self.assertIsInstance(bob, (ET.Element, pyET.Element))
    167         self.assertEqual(len(list(alice)), len(list(bob)))
    168         for x, y in zip(alice, bob):
    169             self.assertEqualElements(x, y)
    170         properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
    171         self.assertEqual(properties(alice), properties(bob))
    172 
    173 # --------------------------------------------------------------------
    174 # element tree tests
    175 
    176 class ElementTreeTest(unittest.TestCase):
    177 
    178     def serialize_check(self, elem, expected):
    179         self.assertEqual(serialize(elem), expected)
    180 
    181     def test_interface(self):
    182         # Test element tree interface.
    183 
    184         def check_string(string):
    185             len(string)
    186             for char in string:
    187                 self.assertEqual(len(char), 1,
    188                         msg="expected one-character string, got %r" % char)
    189             new_string = string + ""
    190             new_string = string + " "
    191             string[:0]
    192 
    193         def check_mapping(mapping):
    194             len(mapping)
    195             keys = mapping.keys()
    196             items = mapping.items()
    197             for key in keys:
    198                 item = mapping[key]
    199             mapping["key"] = "value"
    200             self.assertEqual(mapping["key"], "value",
    201                     msg="expected value string, got %r" % mapping["key"])
    202 
    203         def check_element(element):
    204             self.assertTrue(ET.iselement(element), msg="not an element")
    205             direlem = dir(element)
    206             for attr in 'tag', 'attrib', 'text', 'tail':
    207                 self.assertTrue(hasattr(element, attr),
    208                         msg='no %s member' % attr)
    209                 self.assertIn(attr, direlem,
    210                         msg='no %s visible by dir' % attr)
    211 
    212             check_string(element.tag)
    213             check_mapping(element.attrib)
    214             if element.text is not None:
    215                 check_string(element.text)
    216             if element.tail is not None:
    217                 check_string(element.tail)
    218             for elem in element:
    219                 check_element(elem)
    220 
    221         element = ET.Element("tag")
    222         check_element(element)
    223         tree = ET.ElementTree(element)
    224         check_element(tree.getroot())
    225         element = ET.Element("t\xe4g", key="value")
    226         tree = ET.ElementTree(element)
    227         self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
    228         element = ET.Element("tag", key="value")
    229 
    230         # Make sure all standard element methods exist.
    231 
    232         def check_method(method):
    233             self.assertTrue(hasattr(method, '__call__'),
    234                     msg="%s not callable" % method)
    235 
    236         check_method(element.append)
    237         check_method(element.extend)
    238         check_method(element.insert)
    239         check_method(element.remove)
    240         check_method(element.getchildren)
    241         check_method(element.find)
    242         check_method(element.iterfind)
    243         check_method(element.findall)
    244         check_method(element.findtext)
    245         check_method(element.clear)
    246         check_method(element.get)
    247         check_method(element.set)
    248         check_method(element.keys)
    249         check_method(element.items)
    250         check_method(element.iter)
    251         check_method(element.itertext)
    252         check_method(element.getiterator)
    253 
    254         # These methods return an iterable. See bug 6472.
    255 
    256         def check_iter(it):
    257             check_method(it.__next__)
    258 
    259         check_iter(element.iterfind("tag"))
    260         check_iter(element.iterfind("*"))
    261         check_iter(tree.iterfind("tag"))
    262         check_iter(tree.iterfind("*"))
    263 
    264         # These aliases are provided:
    265 
    266         self.assertEqual(ET.XML, ET.fromstring)
    267         self.assertEqual(ET.PI, ET.ProcessingInstruction)
    268 
    269     def test_set_attribute(self):
    270         element = ET.Element('tag')
    271 
    272         self.assertEqual(element.tag, 'tag')
    273         element.tag = 'Tag'
    274         self.assertEqual(element.tag, 'Tag')
    275         element.tag = 'TAG'
    276         self.assertEqual(element.tag, 'TAG')
    277 
    278         self.assertIsNone(element.text)
    279         element.text = 'Text'
    280         self.assertEqual(element.text, 'Text')
    281         element.text = 'TEXT'
    282         self.assertEqual(element.text, 'TEXT')
    283 
    284         self.assertIsNone(element.tail)
    285         element.tail = 'Tail'
    286         self.assertEqual(element.tail, 'Tail')
    287         element.tail = 'TAIL'
    288         self.assertEqual(element.tail, 'TAIL')
    289 
    290         self.assertEqual(element.attrib, {})
    291         element.attrib = {'a': 'b', 'c': 'd'}
    292         self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
    293         element.attrib = {'A': 'B', 'C': 'D'}
    294         self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
    295 
    296     def test_simpleops(self):
    297         # Basic method sanity checks.
    298 
    299         elem = ET.XML("<body><tag/></body>")
    300         self.serialize_check(elem, '<body><tag /></body>')
    301         e = ET.Element("tag2")
    302         elem.append(e)
    303         self.serialize_check(elem, '<body><tag /><tag2 /></body>')
    304         elem.remove(e)
    305         self.serialize_check(elem, '<body><tag /></body>')
    306         elem.insert(0, e)
    307         self.serialize_check(elem, '<body><tag2 /><tag /></body>')
    308         elem.remove(e)
    309         elem.extend([e])
    310         self.serialize_check(elem, '<body><tag /><tag2 /></body>')
    311         elem.remove(e)
    312 
    313         element = ET.Element("tag", key="value")
    314         self.serialize_check(element, '<tag key="value" />') # 1
    315         subelement = ET.Element("subtag")
    316         element.append(subelement)
    317         self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
    318         element.insert(0, subelement)
    319         self.serialize_check(element,
    320                 '<tag key="value"><subtag /><subtag /></tag>') # 3
    321         element.remove(subelement)
    322         self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
    323         element.remove(subelement)
    324         self.serialize_check(element, '<tag key="value" />') # 5
    325         with self.assertRaises(ValueError) as cm:
    326             element.remove(subelement)
    327         self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
    328         self.serialize_check(element, '<tag key="value" />') # 6
    329         element[0:0] = [subelement, subelement, subelement]
    330         self.serialize_check(element[1], '<subtag />')
    331         self.assertEqual(element[1:9], [element[1], element[2]])
    332         self.assertEqual(element[:9:2], [element[0], element[2]])
    333         del element[1:2]
    334         self.serialize_check(element,
    335                 '<tag key="value"><subtag /><subtag /></tag>')
    336 
    337     def test_cdata(self):
    338         # Test CDATA handling (etc).
    339 
    340         self.serialize_check(ET.XML("<tag>hello</tag>"),
    341                 '<tag>hello</tag>')
    342         self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
    343                 '<tag>hello</tag>')
    344         self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
    345                 '<tag>hello</tag>')
    346 
    347     def test_file_init(self):
    348         stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
    349         tree = ET.ElementTree(file=stringfile)
    350         self.assertEqual(tree.find("tag").tag, 'tag')
    351         self.assertEqual(tree.find("section/tag").tag, 'tag')
    352 
    353         tree = ET.ElementTree(file=SIMPLE_XMLFILE)
    354         self.assertEqual(tree.find("element").tag, 'element')
    355         self.assertEqual(tree.find("element/../empty-element").tag,
    356                 'empty-element')
    357 
    358     def test_path_cache(self):
    359         # Check that the path cache behaves sanely.
    360 
    361         from xml.etree import ElementPath
    362 
    363         elem = ET.XML(SAMPLE_XML)
    364         for i in range(10): ET.ElementTree(elem).find('./'+str(i))
    365         cache_len_10 = len(ElementPath._cache)
    366         for i in range(10): ET.ElementTree(elem).find('./'+str(i))
    367         self.assertEqual(len(ElementPath._cache), cache_len_10)
    368         for i in range(20): ET.ElementTree(elem).find('./'+str(i))
    369         self.assertGreater(len(ElementPath._cache), cache_len_10)
    370         for i in range(600): ET.ElementTree(elem).find('./'+str(i))
    371         self.assertLess(len(ElementPath._cache), 500)
    372 
    373     def test_copy(self):
    374         # Test copy handling (etc).
    375 
    376         import copy
    377         e1 = ET.XML("<tag>hello<foo/></tag>")
    378         e2 = copy.copy(e1)
    379         e3 = copy.deepcopy(e1)
    380         e1.find("foo").tag = "bar"
    381         self.serialize_check(e1, '<tag>hello<bar /></tag>')
    382         self.serialize_check(e2, '<tag>hello<bar /></tag>')
    383         self.serialize_check(e3, '<tag>hello<foo /></tag>')
    384 
    385     def test_attrib(self):
    386         # Test attribute handling.
    387 
    388         elem = ET.Element("tag")
    389         elem.get("key") # 1.1
    390         self.assertEqual(elem.get("key", "default"), 'default') # 1.2
    391 
    392         elem.set("key", "value")
    393         self.assertEqual(elem.get("key"), 'value') # 1.3
    394 
    395         elem = ET.Element("tag", key="value")
    396         self.assertEqual(elem.get("key"), 'value') # 2.1
    397         self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
    398 
    399         attrib = {"key": "value"}
    400         elem = ET.Element("tag", attrib)
    401         attrib.clear() # check for aliasing issues
    402         self.assertEqual(elem.get("key"), 'value') # 3.1
    403         self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
    404 
    405         attrib = {"key": "value"}
    406         elem = ET.Element("tag", **attrib)
    407         attrib.clear() # check for aliasing issues
    408         self.assertEqual(elem.get("key"), 'value') # 4.1
    409         self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
    410 
    411         elem = ET.Element("tag", {"key": "other"}, key="value")
    412         self.assertEqual(elem.get("key"), 'value') # 5.1
    413         self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
    414 
    415         elem = ET.Element('test')
    416         elem.text = "aa"
    417         elem.set('testa', 'testval')
    418         elem.set('testb', 'test2')
    419         self.assertEqual(ET.tostring(elem),
    420                 b'<test testa="testval" testb="test2">aa</test>')
    421         self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
    422         self.assertEqual(sorted(elem.items()),
    423                 [('testa', 'testval'), ('testb', 'test2')])
    424         self.assertEqual(elem.attrib['testb'], 'test2')
    425         elem.attrib['testb'] = 'test1'
    426         elem.attrib['testc'] = 'test2'
    427         self.assertEqual(ET.tostring(elem),
    428                 b'<test testa="testval" testb="test1" testc="test2">aa</test>')
    429 
    430         elem = ET.Element('test')
    431         elem.set('a', '\r')
    432         elem.set('b', '\r\n')
    433         elem.set('c', '\t\n\r ')
    434         elem.set('d', '\n\n')
    435         self.assertEqual(ET.tostring(elem),
    436                 b'<test a="&#10;" b="&#10;" c="&#09;&#10;&#10; " d="&#10;&#10;" />')
    437 
    438     def test_makeelement(self):
    439         # Test makeelement handling.
    440 
    441         elem = ET.Element("tag")
    442         attrib = {"key": "value"}
    443         subelem = elem.makeelement("subtag", attrib)
    444         self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
    445         elem.append(subelem)
    446         self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
    447 
    448         elem.clear()
    449         self.serialize_check(elem, '<tag />')
    450         elem.append(subelem)
    451         self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
    452         elem.extend([subelem, subelem])
    453         self.serialize_check(elem,
    454             '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
    455         elem[:] = [subelem]
    456         self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
    457         elem[:] = tuple([subelem])
    458         self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
    459 
    460     def test_parsefile(self):
    461         # Test parsing from file.
    462 
    463         tree = ET.parse(SIMPLE_XMLFILE)
    464         stream = io.StringIO()
    465         tree.write(stream, encoding='unicode')
    466         self.assertEqual(stream.getvalue(),
    467                 '<root>\n'
    468                 '   <element key="value">text</element>\n'
    469                 '   <element>text</element>tail\n'
    470                 '   <empty-element />\n'
    471                 '</root>')
    472         tree = ET.parse(SIMPLE_NS_XMLFILE)
    473         stream = io.StringIO()
    474         tree.write(stream, encoding='unicode')
    475         self.assertEqual(stream.getvalue(),
    476                 '<ns0:root xmlns:ns0="namespace">\n'
    477                 '   <ns0:element key="value">text</ns0:element>\n'
    478                 '   <ns0:element>text</ns0:element>tail\n'
    479                 '   <ns0:empty-element />\n'
    480                 '</ns0:root>')
    481 
    482         with open(SIMPLE_XMLFILE) as f:
    483             data = f.read()
    484 
    485         parser = ET.XMLParser()
    486         self.assertRegex(parser.version, r'^Expat ')
    487         parser.feed(data)
    488         self.serialize_check(parser.close(),
    489                 '<root>\n'
    490                 '   <element key="value">text</element>\n'
    491                 '   <element>text</element>tail\n'
    492                 '   <empty-element />\n'
    493                 '</root>')
    494 
    495         target = ET.TreeBuilder()
    496         parser = ET.XMLParser(target=target)
    497         parser.feed(data)
    498         self.serialize_check(parser.close(),
    499                 '<root>\n'
    500                 '   <element key="value">text</element>\n'
    501                 '   <element>text</element>tail\n'
    502                 '   <empty-element />\n'
    503                 '</root>')
    504 
    505     def test_parseliteral(self):
    506         element = ET.XML("<html><body>text</body></html>")
    507         self.assertEqual(ET.tostring(element, encoding='unicode'),
    508                 '<html><body>text</body></html>')
    509         element = ET.fromstring("<html><body>text</body></html>")
    510         self.assertEqual(ET.tostring(element, encoding='unicode'),
    511                 '<html><body>text</body></html>')
    512         sequence = ["<html><body>", "text</bo", "dy></html>"]
    513         element = ET.fromstringlist(sequence)
    514         self.assertEqual(ET.tostring(element),
    515                 b'<html><body>text</body></html>')
    516         self.assertEqual(b"".join(ET.tostringlist(element)),
    517                 b'<html><body>text</body></html>')
    518         self.assertEqual(ET.tostring(element, "ascii"),
    519                 b"<?xml version='1.0' encoding='ascii'?>\n"
    520                 b"<html><body>text</body></html>")
    521         _, ids = ET.XMLID("<html><body>text</body></html>")
    522         self.assertEqual(len(ids), 0)
    523         _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
    524         self.assertEqual(len(ids), 1)
    525         self.assertEqual(ids["body"].tag, 'body')
    526 
    527     def test_iterparse(self):
    528         # Test iterparse interface.
    529 
    530         iterparse = ET.iterparse
    531 
    532         context = iterparse(SIMPLE_XMLFILE)
    533         action, elem = next(context)
    534         self.assertEqual((action, elem.tag), ('end', 'element'))
    535         self.assertEqual([(action, elem.tag) for action, elem in context], [
    536                 ('end', 'element'),
    537                 ('end', 'empty-element'),
    538                 ('end', 'root'),
    539             ])
    540         self.assertEqual(context.root.tag, 'root')
    541 
    542         context = iterparse(SIMPLE_NS_XMLFILE)
    543         self.assertEqual([(action, elem.tag) for action, elem in context], [
    544                 ('end', '{namespace}element'),
    545                 ('end', '{namespace}element'),
    546                 ('end', '{namespace}empty-element'),
    547                 ('end', '{namespace}root'),
    548             ])
    549 
    550         events = ()
    551         context = iterparse(SIMPLE_XMLFILE, events)
    552         self.assertEqual([(action, elem.tag) for action, elem in context], [])
    553 
    554         events = ()
    555         context = iterparse(SIMPLE_XMLFILE, events=events)
    556         self.assertEqual([(action, elem.tag) for action, elem in context], [])
    557 
    558         events = ("start", "end")
    559         context = iterparse(SIMPLE_XMLFILE, events)
    560         self.assertEqual([(action, elem.tag) for action, elem in context], [
    561                 ('start', 'root'),
    562                 ('start', 'element'),
    563                 ('end', 'element'),
    564                 ('start', 'element'),
    565                 ('end', 'element'),
    566                 ('start', 'empty-element'),
    567                 ('end', 'empty-element'),
    568                 ('end', 'root'),
    569             ])
    570 
    571         events = ("start", "end", "start-ns", "end-ns")
    572         context = iterparse(SIMPLE_NS_XMLFILE, events)
    573         self.assertEqual([(action, elem.tag) if action in ("start", "end")
    574                                              else (action, elem)
    575                           for action, elem in context], [
    576                 ('start-ns', ('', 'namespace')),
    577                 ('start', '{namespace}root'),
    578                 ('start', '{namespace}element'),
    579                 ('end', '{namespace}element'),
    580                 ('start', '{namespace}element'),
    581                 ('end', '{namespace}element'),
    582                 ('start', '{namespace}empty-element'),
    583                 ('end', '{namespace}empty-element'),
    584                 ('end', '{namespace}root'),
    585                 ('end-ns', None),
    586             ])
    587 
    588         events = ('start-ns', 'end-ns')
    589         context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
    590         res = [action for action, elem in context]
    591         self.assertEqual(res, ['start-ns', 'end-ns'])
    592 
    593         events = ("start", "end", "bogus")
    594         with open(SIMPLE_XMLFILE, "rb") as f:
    595             with self.assertRaises(ValueError) as cm:
    596                 iterparse(f, events)
    597             self.assertFalse(f.closed)
    598         self.assertEqual(str(cm.exception), "unknown event 'bogus'")
    599 
    600         with support.check_no_resource_warning(self):
    601             with self.assertRaises(ValueError) as cm:
    602                 iterparse(SIMPLE_XMLFILE, events)
    603             self.assertEqual(str(cm.exception), "unknown event 'bogus'")
    604             del cm
    605 
    606         source = io.BytesIO(
    607             b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
    608             b"<body xmlns='http://&#233;ffbot.org/ns'\n"
    609             b"      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
    610         events = ("start-ns",)
    611         context = iterparse(source, events)
    612         self.assertEqual([(action, elem) for action, elem in context], [
    613                 ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
    614                 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
    615             ])
    616 
    617         source = io.StringIO("<document />junk")
    618         it = iterparse(source)
    619         action, elem = next(it)
    620         self.assertEqual((action, elem.tag), ('end', 'document'))
    621         with self.assertRaises(ET.ParseError) as cm:
    622             next(it)
    623         self.assertEqual(str(cm.exception),
    624                 'junk after document element: line 1, column 12')
    625 
    626         self.addCleanup(support.unlink, TESTFN)
    627         with open(TESTFN, "wb") as f:
    628             f.write(b"<document />junk")
    629         it = iterparse(TESTFN)
    630         action, elem = next(it)
    631         self.assertEqual((action, elem.tag), ('end', 'document'))
    632         with support.check_no_resource_warning(self):
    633             with self.assertRaises(ET.ParseError) as cm:
    634                 next(it)
    635             self.assertEqual(str(cm.exception),
    636                     'junk after document element: line 1, column 12')
    637             del cm, it
    638 
    639     def test_writefile(self):
    640         elem = ET.Element("tag")
    641         elem.text = "text"
    642         self.serialize_check(elem, '<tag>text</tag>')
    643         ET.SubElement(elem, "subtag").text = "subtext"
    644         self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
    645 
    646         # Test tag suppression
    647         elem.tag = None
    648         self.serialize_check(elem, 'text<subtag>subtext</subtag>')
    649         elem.insert(0, ET.Comment("comment"))
    650         self.serialize_check(elem,
    651                 'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
    652 
    653         elem[0] = ET.PI("key", "value")
    654         self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
    655 
    656     def test_custom_builder(self):
    657         # Test parser w. custom builder.
    658 
    659         with open(SIMPLE_XMLFILE) as f:
    660             data = f.read()
    661         class Builder(list):
    662             def start(self, tag, attrib):
    663                 self.append(("start", tag))
    664             def end(self, tag):
    665                 self.append(("end", tag))
    666             def data(self, text):
    667                 pass
    668         builder = Builder()
    669         parser = ET.XMLParser(target=builder)
    670         parser.feed(data)
    671         self.assertEqual(builder, [
    672                 ('start', 'root'),
    673                 ('start', 'element'),
    674                 ('end', 'element'),
    675                 ('start', 'element'),
    676                 ('end', 'element'),
    677                 ('start', 'empty-element'),
    678                 ('end', 'empty-element'),
    679                 ('end', 'root'),
    680             ])
    681 
    682         with open(SIMPLE_NS_XMLFILE) as f:
    683             data = f.read()
    684         class Builder(list):
    685             def start(self, tag, attrib):
    686                 self.append(("start", tag))
    687             def end(self, tag):
    688                 self.append(("end", tag))
    689             def data(self, text):
    690                 pass
    691             def pi(self, target, data):
    692                 self.append(("pi", target, data))
    693             def comment(self, data):
    694                 self.append(("comment", data))
    695         builder = Builder()
    696         parser = ET.XMLParser(target=builder)
    697         parser.feed(data)
    698         self.assertEqual(builder, [
    699                 ('pi', 'pi', 'data'),
    700                 ('comment', ' comment '),
    701                 ('start', '{namespace}root'),
    702                 ('start', '{namespace}element'),
    703                 ('end', '{namespace}element'),
    704                 ('start', '{namespace}element'),
    705                 ('end', '{namespace}element'),
    706                 ('start', '{namespace}empty-element'),
    707                 ('end', '{namespace}empty-element'),
    708                 ('end', '{namespace}root'),
    709             ])
    710 
    711 
    712     # Element.getchildren() and ElementTree.getiterator() are deprecated.
    713     @checkwarnings(("This method will be removed in future versions.  "
    714                     "Use .+ instead.",
    715                     (DeprecationWarning, PendingDeprecationWarning)))
    716     def test_getchildren(self):
    717         # Test Element.getchildren()
    718 
    719         with open(SIMPLE_XMLFILE, "rb") as f:
    720             tree = ET.parse(f)
    721         self.assertEqual([summarize_list(elem.getchildren())
    722                           for elem in tree.getroot().iter()], [
    723                 ['element', 'element', 'empty-element'],
    724                 [],
    725                 [],
    726                 [],
    727             ])
    728         self.assertEqual([summarize_list(elem.getchildren())
    729                           for elem in tree.getiterator()], [
    730                 ['element', 'element', 'empty-element'],
    731                 [],
    732                 [],
    733                 [],
    734             ])
    735 
    736         elem = ET.XML(SAMPLE_XML)
    737         self.assertEqual(len(elem.getchildren()), 3)
    738         self.assertEqual(len(elem[2].getchildren()), 1)
    739         self.assertEqual(elem[:], elem.getchildren())
    740         child1 = elem[0]
    741         child2 = elem[2]
    742         del elem[1:2]
    743         self.assertEqual(len(elem.getchildren()), 2)
    744         self.assertEqual(child1, elem[0])
    745         self.assertEqual(child2, elem[1])
    746         elem[0:2] = [child2, child1]
    747         self.assertEqual(child2, elem[0])
    748         self.assertEqual(child1, elem[1])
    749         self.assertNotEqual(child1, elem[0])
    750         elem.clear()
    751         self.assertEqual(elem.getchildren(), [])
    752 
    753     def test_writestring(self):
    754         elem = ET.XML("<html><body>text</body></html>")
    755         self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
    756         elem = ET.fromstring("<html><body>text</body></html>")
    757         self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
    758 
    759     def test_encoding(self):
    760         def check(encoding, body=''):
    761             xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
    762                    (encoding, body))
    763             self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
    764             self.assertEqual(ET.XML(xml).text, body)
    765         check("ascii", 'a')
    766         check("us-ascii", 'a')
    767         check("iso-8859-1", '\xbd')
    768         check("iso-8859-15", '\u20ac')
    769         check("cp437", '\u221a')
    770         check("mac-roman", '\u02da')
    771 
    772         def xml(encoding):
    773             return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
    774         def bxml(encoding):
    775             return xml(encoding).encode(encoding)
    776         supported_encodings = [
    777             'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
    778             'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
    779             'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
    780             'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
    781             'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
    782             'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
    783             'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
    784             'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
    785             'cp1256', 'cp1257', 'cp1258',
    786             'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
    787             'mac-roman', 'mac-turkish',
    788             'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
    789             'iso2022-jp-3', 'iso2022-jp-ext',
    790             'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
    791             'hz', 'ptcp154',
    792         ]
    793         for encoding in supported_encodings:
    794             self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
    795 
    796         unsupported_ascii_compatible_encodings = [
    797             'big5', 'big5hkscs',
    798             'cp932', 'cp949', 'cp950',
    799             'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
    800             'gb2312', 'gbk', 'gb18030',
    801             'iso2022-kr', 'johab',
    802             'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
    803             'utf-7',
    804         ]
    805         for encoding in unsupported_ascii_compatible_encodings:
    806             self.assertRaises(ValueError, ET.XML, bxml(encoding))
    807 
    808         unsupported_ascii_incompatible_encodings = [
    809             'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
    810             'utf_32', 'utf_32_be', 'utf_32_le',
    811         ]
    812         for encoding in unsupported_ascii_incompatible_encodings:
    813             self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
    814 
    815         self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
    816         self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
    817 
    818     def test_methods(self):
    819         # Test serialization methods.
    820 
    821         e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
    822         e.tail = "\n"
    823         self.assertEqual(serialize(e),
    824                 '<html><link /><script>1 &lt; 2</script></html>\n')
    825         self.assertEqual(serialize(e, method=None),
    826                 '<html><link /><script>1 &lt; 2</script></html>\n')
    827         self.assertEqual(serialize(e, method="xml"),
    828                 '<html><link /><script>1 &lt; 2</script></html>\n')
    829         self.assertEqual(serialize(e, method="html"),
    830                 '<html><link><script>1 < 2</script></html>\n')
    831         self.assertEqual(serialize(e, method="text"), '1 < 2\n')
    832 
    833     def test_issue18347(self):
    834         e = ET.XML('<html><CamelCase>text</CamelCase></html>')
    835         self.assertEqual(serialize(e),
    836                 '<html><CamelCase>text</CamelCase></html>')
    837         self.assertEqual(serialize(e, method="html"),
    838                 '<html><CamelCase>text</CamelCase></html>')
    839 
    840     def test_entity(self):
    841         # Test entity handling.
    842 
    843         # 1) good entities
    844 
    845         e = ET.XML("<document title='&#x8230;'>test</document>")
    846         self.assertEqual(serialize(e, encoding="us-ascii"),
    847                 b'<document title="&#33328;">test</document>')
    848         self.serialize_check(e, '<document title="\u8230">test</document>')
    849 
    850         # 2) bad entities
    851 
    852         with self.assertRaises(ET.ParseError) as cm:
    853             ET.XML("<document>&entity;</document>")
    854         self.assertEqual(str(cm.exception),
    855                 'undefined entity: line 1, column 10')
    856 
    857         with self.assertRaises(ET.ParseError) as cm:
    858             ET.XML(ENTITY_XML)
    859         self.assertEqual(str(cm.exception),
    860                 'undefined entity &entity;: line 5, column 10')
    861 
    862         # 3) custom entity
    863 
    864         parser = ET.XMLParser()
    865         parser.entity["entity"] = "text"
    866         parser.feed(ENTITY_XML)
    867         root = parser.close()
    868         self.serialize_check(root, '<document>text</document>')
    869 
    870         # 4) external (SYSTEM) entity
    871 
    872         with self.assertRaises(ET.ParseError) as cm:
    873             ET.XML(EXTERNAL_ENTITY_XML)
    874         self.assertEqual(str(cm.exception),
    875                 'undefined entity &entity;: line 4, column 10')
    876 
    877     def test_namespace(self):
    878         # Test namespace issues.
    879 
    880         # 1) xml namespace
    881 
    882         elem = ET.XML("<tag xml:lang='en' />")
    883         self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
    884 
    885         # 2) other "well-known" namespaces
    886 
    887         elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
    888         self.serialize_check(elem,
    889             '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
    890 
    891         elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
    892         self.serialize_check(elem,
    893             '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
    894 
    895         elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
    896         self.serialize_check(elem,
    897             '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
    898 
    899         # 3) unknown namespaces
    900         elem = ET.XML(SAMPLE_XML_NS)
    901         self.serialize_check(elem,
    902             '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
    903             '  <ns0:tag>text</ns0:tag>\n'
    904             '  <ns0:tag />\n'
    905             '  <ns0:section>\n'
    906             '    <ns0:tag>subtext</ns0:tag>\n'
    907             '  </ns0:section>\n'
    908             '</ns0:body>')
    909 
    910     def test_qname(self):
    911         # Test QName handling.
    912 
    913         # 1) decorated tags
    914 
    915         elem = ET.Element("{uri}tag")
    916         self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
    917         elem = ET.Element(ET.QName("{uri}tag"))
    918         self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
    919         elem = ET.Element(ET.QName("uri", "tag"))
    920         self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
    921         elem = ET.Element(ET.QName("uri", "tag"))
    922         subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
    923         subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
    924         self.serialize_check(elem,
    925             '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
    926 
    927         # 2) decorated attributes
    928 
    929         elem.clear()
    930         elem.attrib["{uri}key"] = "value"
    931         self.serialize_check(elem,
    932             '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
    933 
    934         elem.clear()
    935         elem.attrib[ET.QName("{uri}key")] = "value"
    936         self.serialize_check(elem,
    937             '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
    938 
    939         # 3) decorated values are not converted by default, but the
    940         # QName wrapper can be used for values
    941 
    942         elem.clear()
    943         elem.attrib["{uri}key"] = "{uri}value"
    944         self.serialize_check(elem,
    945             '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
    946 
    947         elem.clear()
    948         elem.attrib["{uri}key"] = ET.QName("{uri}value")
    949         self.serialize_check(elem,
    950             '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
    951 
    952         elem.clear()
    953         subelem = ET.Element("tag")
    954         subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
    955         elem.append(subelem)
    956         elem.append(subelem)
    957         self.serialize_check(elem,
    958             '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
    959             '<tag ns1:key="ns2:value" />'
    960             '<tag ns1:key="ns2:value" />'
    961             '</ns0:tag>') # 3.3
    962 
    963         # 4) Direct QName tests
    964 
    965         self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
    966         self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
    967         q1 = ET.QName('ns', 'tag')
    968         q2 = ET.QName('ns', 'tag')
    969         self.assertEqual(q1, q2)
    970         q2 = ET.QName('ns', 'other-tag')
    971         self.assertNotEqual(q1, q2)
    972         self.assertNotEqual(q1, 'ns:tag')
    973         self.assertEqual(q1, '{ns}tag')
    974 
    975     def test_doctype_public(self):
    976         # Test PUBLIC doctype.
    977 
    978         elem = ET.XML('<!DOCTYPE html PUBLIC'
    979                 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
    980                 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
    981                 '<html>text</html>')
    982 
    983     def test_xpath_tokenizer(self):
    984         # Test the XPath tokenizer.
    985         from xml.etree import ElementPath
    986         def check(p, expected):
    987             self.assertEqual([op or tag
    988                               for op, tag in ElementPath.xpath_tokenizer(p)],
    989                              expected)
    990 
    991         # tests from the xml specification
    992         check("*", ['*'])
    993         check("text()", ['text', '()'])
    994         check("@name", ['@', 'name'])
    995         check("@*", ['@', '*'])
    996         check("para[1]", ['para', '[', '1', ']'])
    997         check("para[last()]", ['para', '[', 'last', '()', ']'])
    998         check("*/para", ['*', '/', 'para'])
    999         check("/doc/chapter[5]/section[2]",
   1000               ['/', 'doc', '/', 'chapter', '[', '5', ']',
   1001                '/', 'section', '[', '2', ']'])
   1002         check("chapter//para", ['chapter', '//', 'para'])
   1003         check("//para", ['//', 'para'])
   1004         check("//olist/item", ['//', 'olist', '/', 'item'])
   1005         check(".", ['.'])
   1006         check(".//para", ['.', '//', 'para'])
   1007         check("..", ['..'])
   1008         check("../@lang", ['..', '/', '@', 'lang'])
   1009         check("chapter[title]", ['chapter', '[', 'title', ']'])
   1010         check("employee[@secretary and @assistant]", ['employee',
   1011               '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
   1012 
   1013         # additional tests
   1014         check("{http://spam}egg", ['{http://spam}egg'])
   1015         check("./spam.egg", ['.', '/', 'spam.egg'])
   1016         check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
   1017 
   1018     def test_processinginstruction(self):
   1019         # Test ProcessingInstruction directly
   1020 
   1021         self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
   1022                 b'<?test instruction?>')
   1023         self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
   1024                 b'<?test instruction?>')
   1025 
   1026         # Issue #2746
   1027 
   1028         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
   1029                 b'<?test <testing&>?>')
   1030         self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
   1031                 b"<?xml version='1.0' encoding='latin-1'?>\n"
   1032                 b"<?test <testing&>\xe3?>")
   1033 
   1034     def test_html_empty_elems_serialization(self):
   1035         # issue 15970
   1036         # from http://www.w3.org/TR/html401/index/elements.html
   1037         for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
   1038                         'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
   1039             for elem in [element, element.lower()]:
   1040                 expected = '<%s>' % elem
   1041                 serialized = serialize(ET.XML('<%s />' % elem), method='html')
   1042                 self.assertEqual(serialized, expected)
   1043                 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
   1044                                        method='html')
   1045                 self.assertEqual(serialized, expected)
   1046 
   1047 
   1048 class XMLPullParserTest(unittest.TestCase):
   1049 
   1050     def _feed(self, parser, data, chunk_size=None):
   1051         if chunk_size is None:
   1052             parser.feed(data)
   1053         else:
   1054             for i in range(0, len(data), chunk_size):
   1055                 parser.feed(data[i:i+chunk_size])
   1056 
   1057     def assert_event_tags(self, parser, expected):
   1058         events = parser.read_events()
   1059         self.assertEqual([(action, elem.tag) for action, elem in events],
   1060                          expected)
   1061 
   1062     def test_simple_xml(self):
   1063         for chunk_size in (None, 1, 5):
   1064             with self.subTest(chunk_size=chunk_size):
   1065                 parser = ET.XMLPullParser()
   1066                 self.assert_event_tags(parser, [])
   1067                 self._feed(parser, "<!-- comment -->\n", chunk_size)
   1068                 self.assert_event_tags(parser, [])
   1069                 self._feed(parser,
   1070                            "<root>\n  <element key='value'>text</element",
   1071                            chunk_size)
   1072                 self.assert_event_tags(parser, [])
   1073                 self._feed(parser, ">\n", chunk_size)
   1074                 self.assert_event_tags(parser, [('end', 'element')])
   1075                 self._feed(parser, "<element>text</element>tail\n", chunk_size)
   1076                 self._feed(parser, "<empty-element/>\n", chunk_size)
   1077                 self.assert_event_tags(parser, [
   1078                     ('end', 'element'),
   1079                     ('end', 'empty-element'),
   1080                     ])
   1081                 self._feed(parser, "</root>\n", chunk_size)
   1082                 self.assert_event_tags(parser, [('end', 'root')])
   1083                 self.assertIsNone(parser.close())
   1084 
   1085     def test_feed_while_iterating(self):
   1086         parser = ET.XMLPullParser()
   1087         it = parser.read_events()
   1088         self._feed(parser, "<root>\n  <element key='value'>text</element>\n")
   1089         action, elem = next(it)
   1090         self.assertEqual((action, elem.tag), ('end', 'element'))
   1091         self._feed(parser, "</root>\n")
   1092         action, elem = next(it)
   1093         self.assertEqual((action, elem.tag), ('end', 'root'))
   1094         with self.assertRaises(StopIteration):
   1095             next(it)
   1096 
   1097     def test_simple_xml_with_ns(self):
   1098         parser = ET.XMLPullParser()
   1099         self.assert_event_tags(parser, [])
   1100         self._feed(parser, "<!-- comment -->\n")
   1101         self.assert_event_tags(parser, [])
   1102         self._feed(parser, "<root xmlns='namespace'>\n")
   1103         self.assert_event_tags(parser, [])
   1104         self._feed(parser, "<element key='value'>text</element")
   1105         self.assert_event_tags(parser, [])
   1106         self._feed(parser, ">\n")
   1107         self.assert_event_tags(parser, [('end', '{namespace}element')])
   1108         self._feed(parser, "<element>text</element>tail\n")
   1109         self._feed(parser, "<empty-element/>\n")
   1110         self.assert_event_tags(parser, [
   1111             ('end', '{namespace}element'),
   1112             ('end', '{namespace}empty-element'),
   1113             ])
   1114         self._feed(parser, "</root>\n")
   1115         self.assert_event_tags(parser, [('end', '{namespace}root')])
   1116         self.assertIsNone(parser.close())
   1117 
   1118     def test_ns_events(self):
   1119         parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
   1120         self._feed(parser, "<!-- comment -->\n")
   1121         self._feed(parser, "<root xmlns='namespace'>\n")
   1122         self.assertEqual(
   1123             list(parser.read_events()),
   1124             [('start-ns', ('', 'namespace'))])
   1125         self._feed(parser, "<element key='value'>text</element")
   1126         self._feed(parser, ">\n")
   1127         self._feed(parser, "<element>text</element>tail\n")
   1128         self._feed(parser, "<empty-element/>\n")
   1129         self._feed(parser, "</root>\n")
   1130         self.assertEqual(list(parser.read_events()), [('end-ns', None)])
   1131         self.assertIsNone(parser.close())
   1132 
   1133     def test_events(self):
   1134         parser = ET.XMLPullParser(events=())
   1135         self._feed(parser, "<root/>\n")
   1136         self.assert_event_tags(parser, [])
   1137 
   1138         parser = ET.XMLPullParser(events=('start', 'end'))
   1139         self._feed(parser, "<!-- comment -->\n")
   1140         self.assert_event_tags(parser, [])
   1141         self._feed(parser, "<root>\n")
   1142         self.assert_event_tags(parser, [('start', 'root')])
   1143         self._feed(parser, "<element key='value'>text</element")
   1144         self.assert_event_tags(parser, [('start', 'element')])
   1145         self._feed(parser, ">\n")
   1146         self.assert_event_tags(parser, [('end', 'element')])
   1147         self._feed(parser,
   1148                    "<element xmlns='foo'>text<empty-element/></element>tail\n")
   1149         self.assert_event_tags(parser, [
   1150             ('start', '{foo}element'),
   1151             ('start', '{foo}empty-element'),
   1152             ('end', '{foo}empty-element'),
   1153             ('end', '{foo}element'),
   1154             ])
   1155         self._feed(parser, "</root>")
   1156         self.assertIsNone(parser.close())
   1157         self.assert_event_tags(parser, [('end', 'root')])
   1158 
   1159         parser = ET.XMLPullParser(events=('start',))
   1160         self._feed(parser, "<!-- comment -->\n")
   1161         self.assert_event_tags(parser, [])
   1162         self._feed(parser, "<root>\n")
   1163         self.assert_event_tags(parser, [('start', 'root')])
   1164         self._feed(parser, "<element key='value'>text</element")
   1165         self.assert_event_tags(parser, [('start', 'element')])
   1166         self._feed(parser, ">\n")
   1167         self.assert_event_tags(parser, [])
   1168         self._feed(parser,
   1169                    "<element xmlns='foo'>text<empty-element/></element>tail\n")
   1170         self.assert_event_tags(parser, [
   1171             ('start', '{foo}element'),
   1172             ('start', '{foo}empty-element'),
   1173             ])
   1174         self._feed(parser, "</root>")
   1175         self.assertIsNone(parser.close())
   1176 
   1177     def test_events_sequence(self):
   1178         # Test that events can be some sequence that's not just a tuple or list
   1179         eventset = {'end', 'start'}
   1180         parser = ET.XMLPullParser(events=eventset)
   1181         self._feed(parser, "<foo>bar</foo>")
   1182         self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
   1183 
   1184         class DummyIter:
   1185             def __init__(self):
   1186                 self.events = iter(['start', 'end', 'start-ns'])
   1187             def __iter__(self):
   1188                 return self
   1189             def __next__(self):
   1190                 return next(self.events)
   1191 
   1192         parser = ET.XMLPullParser(events=DummyIter())
   1193         self._feed(parser, "<foo>bar</foo>")
   1194         self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
   1195 
   1196 
   1197     def test_unknown_event(self):
   1198         with self.assertRaises(ValueError):
   1199             ET.XMLPullParser(events=('start', 'end', 'bogus'))
   1200 
   1201 
   1202 #
   1203 # xinclude tests (samples from appendix C of the xinclude specification)
   1204 
   1205 XINCLUDE = {}
   1206 
   1207 XINCLUDE["C1.xml"] = """\
   1208 <?xml version='1.0'?>
   1209 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1210   <p>120 Mz is adequate for an average home user.</p>
   1211   <xi:include href="disclaimer.xml"/>
   1212 </document>
   1213 """
   1214 
   1215 XINCLUDE["disclaimer.xml"] = """\
   1216 <?xml version='1.0'?>
   1217 <disclaimer>
   1218   <p>The opinions represented herein represent those of the individual
   1219   and should not be interpreted as official policy endorsed by this
   1220   organization.</p>
   1221 </disclaimer>
   1222 """
   1223 
   1224 XINCLUDE["C2.xml"] = """\
   1225 <?xml version='1.0'?>
   1226 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1227   <p>This document has been accessed
   1228   <xi:include href="count.txt" parse="text"/> times.</p>
   1229 </document>
   1230 """
   1231 
   1232 XINCLUDE["count.txt"] = "324387"
   1233 
   1234 XINCLUDE["C2b.xml"] = """\
   1235 <?xml version='1.0'?>
   1236 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1237   <p>This document has been <em>accessed</em>
   1238   <xi:include href="count.txt" parse="text"/> times.</p>
   1239 </document>
   1240 """
   1241 
   1242 XINCLUDE["C3.xml"] = """\
   1243 <?xml version='1.0'?>
   1244 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1245   <p>The following is the source of the "data.xml" resource:</p>
   1246   <example><xi:include href="data.xml" parse="text"/></example>
   1247 </document>
   1248 """
   1249 
   1250 XINCLUDE["data.xml"] = """\
   1251 <?xml version='1.0'?>
   1252 <data>
   1253   <item><![CDATA[Brooks & Shields]]></item>
   1254 </data>
   1255 """
   1256 
   1257 XINCLUDE["C5.xml"] = """\
   1258 <?xml version='1.0'?>
   1259 <div xmlns:xi="http://www.w3.org/2001/XInclude">
   1260   <xi:include href="example.txt" parse="text">
   1261     <xi:fallback>
   1262       <xi:include href="fallback-example.txt" parse="text">
   1263         <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
   1264       </xi:include>
   1265     </xi:fallback>
   1266   </xi:include>
   1267 </div>
   1268 """
   1269 
   1270 XINCLUDE["default.xml"] = """\
   1271 <?xml version='1.0'?>
   1272 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1273   <p>Example.</p>
   1274   <xi:include href="{}"/>
   1275 </document>
   1276 """.format(html.escape(SIMPLE_XMLFILE, True))
   1277 
   1278 #
   1279 # badly formatted xi:include tags
   1280 
   1281 XINCLUDE_BAD = {}
   1282 
   1283 XINCLUDE_BAD["B1.xml"] = """\
   1284 <?xml version='1.0'?>
   1285 <document xmlns:xi="http://www.w3.org/2001/XInclude">
   1286   <p>120 Mz is adequate for an average home user.</p>
   1287   <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
   1288 </document>
   1289 """
   1290 
   1291 XINCLUDE_BAD["B2.xml"] = """\
   1292 <?xml version='1.0'?>
   1293 <div xmlns:xi="http://www.w3.org/2001/XInclude">
   1294     <xi:fallback></xi:fallback>
   1295 </div>
   1296 """
   1297 
   1298 class XIncludeTest(unittest.TestCase):
   1299 
   1300     def xinclude_loader(self, href, parse="xml", encoding=None):
   1301         try:
   1302             data = XINCLUDE[href]
   1303         except KeyError:
   1304             raise OSError("resource not found")
   1305         if parse == "xml":
   1306             data = ET.XML(data)
   1307         return data
   1308 
   1309     def none_loader(self, href, parser, encoding=None):
   1310         return None
   1311 
   1312     def _my_loader(self, href, parse):
   1313         # Used to avoid a test-dependency problem where the default loader
   1314         # of ElementInclude uses the pyET parser for cET tests.
   1315         if parse == 'xml':
   1316             with open(href, 'rb') as f:
   1317                 return ET.parse(f).getroot()
   1318         else:
   1319             return None
   1320 
   1321     def test_xinclude_default(self):
   1322         from xml.etree import ElementInclude
   1323         doc = self.xinclude_loader('default.xml')
   1324         ElementInclude.include(doc, self._my_loader)
   1325         self.assertEqual(serialize(doc),
   1326             '<document>\n'
   1327             '  <p>Example.</p>\n'
   1328             '  <root>\n'
   1329             '   <element key="value">text</element>\n'
   1330             '   <element>text</element>tail\n'
   1331             '   <empty-element />\n'
   1332             '</root>\n'
   1333             '</document>')
   1334 
   1335     def test_xinclude(self):
   1336         from xml.etree import ElementInclude
   1337 
   1338         # Basic inclusion example (XInclude C.1)
   1339         document = self.xinclude_loader("C1.xml")
   1340         ElementInclude.include(document, self.xinclude_loader)
   1341         self.assertEqual(serialize(document),
   1342             '<document>\n'
   1343             '  <p>120 Mz is adequate for an average home user.</p>\n'
   1344             '  <disclaimer>\n'
   1345             '  <p>The opinions represented herein represent those of the individual\n'
   1346             '  and should not be interpreted as official policy endorsed by this\n'
   1347             '  organization.</p>\n'
   1348             '</disclaimer>\n'
   1349             '</document>') # C1
   1350 
   1351         # Textual inclusion example (XInclude C.2)
   1352         document = self.xinclude_loader("C2.xml")
   1353         ElementInclude.include(document, self.xinclude_loader)
   1354         self.assertEqual(serialize(document),
   1355             '<document>\n'
   1356             '  <p>This document has been accessed\n'
   1357             '  324387 times.</p>\n'
   1358             '</document>') # C2
   1359 
   1360         # Textual inclusion after sibling element (based on modified XInclude C.2)
   1361         document = self.xinclude_loader("C2b.xml")
   1362         ElementInclude.include(document, self.xinclude_loader)
   1363         self.assertEqual(serialize(document),
   1364             '<document>\n'
   1365             '  <p>This document has been <em>accessed</em>\n'
   1366             '  324387 times.</p>\n'
   1367             '</document>') # C2b
   1368 
   1369         # Textual inclusion of XML example (XInclude C.3)
   1370         document = self.xinclude_loader("C3.xml")
   1371         ElementInclude.include(document, self.xinclude_loader)
   1372         self.assertEqual(serialize(document),
   1373             '<document>\n'
   1374             '  <p>The following is the source of the "data.xml" resource:</p>\n'
   1375             "  <example>&lt;?xml version='1.0'?&gt;\n"
   1376             '&lt;data&gt;\n'
   1377             '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
   1378             '&lt;/data&gt;\n'
   1379             '</example>\n'
   1380             '</document>') # C3
   1381 
   1382         # Fallback example (XInclude C.5)
   1383         # Note! Fallback support is not yet implemented
   1384         document = self.xinclude_loader("C5.xml")
   1385         with self.assertRaises(OSError) as cm:
   1386             ElementInclude.include(document, self.xinclude_loader)
   1387         self.assertEqual(str(cm.exception), 'resource not found')
   1388         self.assertEqual(serialize(document),
   1389             '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
   1390             '  <ns0:include href="example.txt" parse="text">\n'
   1391             '    <ns0:fallback>\n'
   1392             '      <ns0:include href="fallback-example.txt" parse="text">\n'
   1393             '        <ns0:fallback><a href="mailto:bob (at] example.org">Report error</a></ns0:fallback>\n'
   1394             '      </ns0:include>\n'
   1395             '    </ns0:fallback>\n'
   1396             '  </ns0:include>\n'
   1397             '</div>') # C5
   1398 
   1399     def test_xinclude_failures(self):
   1400         from xml.etree import ElementInclude
   1401 
   1402         # Test failure to locate included XML file.
   1403         document = ET.XML(XINCLUDE["C1.xml"])
   1404         with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
   1405             ElementInclude.include(document, loader=self.none_loader)
   1406         self.assertEqual(str(cm.exception),
   1407                 "cannot load 'disclaimer.xml' as 'xml'")
   1408 
   1409         # Test failure to locate included text file.
   1410         document = ET.XML(XINCLUDE["C2.xml"])
   1411         with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
   1412             ElementInclude.include(document, loader=self.none_loader)
   1413         self.assertEqual(str(cm.exception),
   1414                 "cannot load 'count.txt' as 'text'")
   1415 
   1416         # Test bad parse type.
   1417         document = ET.XML(XINCLUDE_BAD["B1.xml"])
   1418         with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
   1419             ElementInclude.include(document, loader=self.none_loader)
   1420         self.assertEqual(str(cm.exception),
   1421                 "unknown parse type in xi:include tag ('BAD_TYPE')")
   1422 
   1423         # Test xi:fallback outside xi:include.
   1424         document = ET.XML(XINCLUDE_BAD["B2.xml"])
   1425         with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
   1426             ElementInclude.include(document, loader=self.none_loader)
   1427         self.assertEqual(str(cm.exception),
   1428                 "xi:fallback tag must be child of xi:include "
   1429                 "('{http://www.w3.org/2001/XInclude}fallback')")
   1430 
   1431 # --------------------------------------------------------------------
   1432 # reported bugs
   1433 
   1434 class BugsTest(unittest.TestCase):
   1435 
   1436     def test_bug_xmltoolkit21(self):
   1437         # marshaller gives obscure errors for non-string values
   1438 
   1439         def check(elem):
   1440             with self.assertRaises(TypeError) as cm:
   1441                 serialize(elem)
   1442             self.assertEqual(str(cm.exception),
   1443                     'cannot serialize 123 (type int)')
   1444 
   1445         elem = ET.Element(123)
   1446         check(elem) # tag
   1447 
   1448         elem = ET.Element("elem")
   1449         elem.text = 123
   1450         check(elem) # text
   1451 
   1452         elem = ET.Element("elem")
   1453         elem.tail = 123
   1454         check(elem) # tail
   1455 
   1456         elem = ET.Element("elem")
   1457         elem.set(123, "123")
   1458         check(elem) # attribute key
   1459 
   1460         elem = ET.Element("elem")
   1461         elem.set("123", 123)
   1462         check(elem) # attribute value
   1463 
   1464     def test_bug_xmltoolkit25(self):
   1465         # typo in ElementTree.findtext
   1466 
   1467         elem = ET.XML(SAMPLE_XML)
   1468         tree = ET.ElementTree(elem)
   1469         self.assertEqual(tree.findtext("tag"), 'text')
   1470         self.assertEqual(tree.findtext("section/tag"), 'subtext')
   1471 
   1472     def test_bug_xmltoolkit28(self):
   1473         # .//tag causes exceptions
   1474 
   1475         tree = ET.XML("<doc><table><tbody/></table></doc>")
   1476         self.assertEqual(summarize_list(tree.findall(".//thead")), [])
   1477         self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
   1478 
   1479     def test_bug_xmltoolkitX1(self):
   1480         # dump() doesn't flush the output buffer
   1481 
   1482         tree = ET.XML("<doc><table><tbody/></table></doc>")
   1483         with support.captured_stdout() as stdout:
   1484             ET.dump(tree)
   1485             self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
   1486 
   1487     def test_bug_xmltoolkit39(self):
   1488         # non-ascii element and attribute names doesn't work
   1489 
   1490         tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
   1491         self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
   1492 
   1493         tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
   1494                       b"<tag \xe4ttr='v&#228;lue' />")
   1495         self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
   1496         self.assertEqual(ET.tostring(tree, "utf-8"),
   1497                 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
   1498 
   1499         tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
   1500                       b'<t\xe4g>text</t\xe4g>')
   1501         self.assertEqual(ET.tostring(tree, "utf-8"),
   1502                 b'<t\xc3\xa4g>text</t\xc3\xa4g>')
   1503 
   1504         tree = ET.Element("t\u00e4g")
   1505         self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
   1506 
   1507         tree = ET.Element("tag")
   1508         tree.set("\u00e4ttr", "v\u00e4lue")
   1509         self.assertEqual(ET.tostring(tree, "utf-8"),
   1510                 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
   1511 
   1512     def test_bug_xmltoolkit54(self):
   1513         # problems handling internally defined entities
   1514 
   1515         e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
   1516                    '<doc>&ldots;</doc>')
   1517         self.assertEqual(serialize(e, encoding="us-ascii"),
   1518                 b'<doc>&#33328;</doc>')
   1519         self.assertEqual(serialize(e), '<doc>\u8230</doc>')
   1520 
   1521     def test_bug_xmltoolkit55(self):
   1522         # make sure we're reporting the first error, not the last
   1523 
   1524         with self.assertRaises(ET.ParseError) as cm:
   1525             ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
   1526                    b'<doc>&ldots;&ndots;&rdots;</doc>')
   1527         self.assertEqual(str(cm.exception),
   1528                 'undefined entity &ldots;: line 1, column 36')
   1529 
   1530     def test_bug_xmltoolkit60(self):
   1531         # Handle crash in stream source.
   1532 
   1533         class ExceptionFile:
   1534             def read(self, x):
   1535                 raise OSError
   1536 
   1537         self.assertRaises(OSError, ET.parse, ExceptionFile())
   1538 
   1539     def test_bug_xmltoolkit62(self):
   1540         # Don't crash when using custom entities.
   1541 
   1542         ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
   1543         parser = ET.XMLParser()
   1544         parser.entity.update(ENTITIES)
   1545         parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
   1546 <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
   1547 <patent-application-publication>
   1548 <subdoc-abstract>
   1549 <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
   1550 </subdoc-abstract>
   1551 </patent-application-publication>""")
   1552         t = parser.close()
   1553         self.assertEqual(t.find('.//paragraph').text,
   1554             'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
   1555 
   1556     @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
   1557     def test_bug_xmltoolkit63(self):
   1558         # Check reference leak.
   1559         def xmltoolkit63():
   1560             tree = ET.TreeBuilder()
   1561             tree.start("tag", {})
   1562             tree.data("text")
   1563             tree.end("tag")
   1564 
   1565         xmltoolkit63()
   1566         count = sys.getrefcount(None)
   1567         for i in range(1000):
   1568             xmltoolkit63()
   1569         self.assertEqual(sys.getrefcount(None), count)
   1570 
   1571     def test_bug_200708_newline(self):
   1572         # Preserve newlines in attributes.
   1573 
   1574         e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
   1575         self.assertEqual(ET.tostring(e),
   1576                 b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
   1577         self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
   1578                 'def _f():\n  return 3\n')
   1579         self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
   1580                 b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
   1581 
   1582     def test_bug_200708_close(self):
   1583         # Test default builder.
   1584         parser = ET.XMLParser() # default
   1585         parser.feed("<element>some text</element>")
   1586         self.assertEqual(parser.close().tag, 'element')
   1587 
   1588         # Test custom builder.
   1589         class EchoTarget:
   1590             def close(self):
   1591                 return ET.Element("element") # simulate root
   1592         parser = ET.XMLParser(target=EchoTarget())
   1593         parser.feed("<element>some text</element>")
   1594         self.assertEqual(parser.close().tag, 'element')
   1595 
   1596     def test_bug_200709_default_namespace(self):
   1597         e = ET.Element("{default}elem")
   1598         s = ET.SubElement(e, "{default}elem")
   1599         self.assertEqual(serialize(e, default_namespace="default"), # 1
   1600                 '<elem xmlns="default"><elem /></elem>')
   1601 
   1602         e = ET.Element("{default}elem")
   1603         s = ET.SubElement(e, "{default}elem")
   1604         s = ET.SubElement(e, "{not-default}elem")
   1605         self.assertEqual(serialize(e, default_namespace="default"), # 2
   1606             '<elem xmlns="default" xmlns:ns1="not-default">'
   1607             '<elem />'
   1608             '<ns1:elem />'
   1609             '</elem>')
   1610 
   1611         e = ET.Element("{default}elem")
   1612         s = ET.SubElement(e, "{default}elem")
   1613         s = ET.SubElement(e, "elem") # unprefixed name
   1614         with self.assertRaises(ValueError) as cm:
   1615             serialize(e, default_namespace="default") # 3
   1616         self.assertEqual(str(cm.exception),
   1617                 'cannot use non-qualified names with default_namespace option')
   1618 
   1619     def test_bug_200709_register_namespace(self):
   1620         e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
   1621         self.assertEqual(ET.tostring(e),
   1622             b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
   1623         ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
   1624         e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
   1625         self.assertEqual(ET.tostring(e),
   1626             b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
   1627 
   1628         # And the Dublin Core namespace is in the default list:
   1629 
   1630         e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
   1631         self.assertEqual(ET.tostring(e),
   1632             b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
   1633 
   1634     def test_bug_200709_element_comment(self):
   1635         # Not sure if this can be fixed, really (since the serializer needs
   1636         # ET.Comment, not cET.comment).
   1637 
   1638         a = ET.Element('a')
   1639         a.append(ET.Comment('foo'))
   1640         self.assertEqual(a[0].tag, ET.Comment)
   1641 
   1642         a = ET.Element('a')
   1643         a.append(ET.PI('foo'))
   1644         self.assertEqual(a[0].tag, ET.PI)
   1645 
   1646     def test_bug_200709_element_insert(self):
   1647         a = ET.Element('a')
   1648         b = ET.SubElement(a, 'b')
   1649         c = ET.SubElement(a, 'c')
   1650         d = ET.Element('d')
   1651         a.insert(0, d)
   1652         self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
   1653         a.insert(-1, d)
   1654         self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
   1655 
   1656     def test_bug_200709_iter_comment(self):
   1657         a = ET.Element('a')
   1658         b = ET.SubElement(a, 'b')
   1659         comment_b = ET.Comment("TEST-b")
   1660         b.append(comment_b)
   1661         self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
   1662 
   1663     # --------------------------------------------------------------------
   1664     # reported on bugs.python.org
   1665 
   1666     def test_bug_1534630(self):
   1667         bob = ET.TreeBuilder()
   1668         e = bob.data("data")
   1669         e = bob.start("tag", {})
   1670         e = bob.end("tag")
   1671         e = bob.close()
   1672         self.assertEqual(serialize(e), '<tag />')
   1673 
   1674     def test_issue6233(self):
   1675         e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
   1676                    b'<body>t\xc3\xa3g</body>')
   1677         self.assertEqual(ET.tostring(e, 'ascii'),
   1678                 b"<?xml version='1.0' encoding='ascii'?>\n"
   1679                 b'<body>t&#227;g</body>')
   1680         e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
   1681                    b'<body>t\xe3g</body>')
   1682         self.assertEqual(ET.tostring(e, 'ascii'),
   1683                 b"<?xml version='1.0' encoding='ascii'?>\n"
   1684                 b'<body>t&#227;g</body>')
   1685 
   1686     def test_issue3151(self):
   1687         e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
   1688         self.assertEqual(e.tag, '{${stuff}}localname')
   1689         t = ET.ElementTree(e)
   1690         self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
   1691 
   1692     def test_issue6565(self):
   1693         elem = ET.XML("<body><tag/></body>")
   1694         self.assertEqual(summarize_list(elem), ['tag'])
   1695         newelem = ET.XML(SAMPLE_XML)
   1696         elem[:] = newelem[:]
   1697         self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
   1698 
   1699     def test_issue10777(self):
   1700         # Registering a namespace twice caused a "dictionary changed size during
   1701         # iteration" bug.
   1702 
   1703         ET.register_namespace('test10777', 'http://myuri/')
   1704         ET.register_namespace('test10777', 'http://myuri/')
   1705 
   1706     def test_lost_text(self):
   1707         # Issue #25902: Borrowed text can disappear
   1708         class Text:
   1709             def __bool__(self):
   1710                 e.text = 'changed'
   1711                 return True
   1712 
   1713         e = ET.Element('tag')
   1714         e.text = Text()
   1715         i = e.itertext()
   1716         t = next(i)
   1717         self.assertIsInstance(t, Text)
   1718         self.assertIsInstance(e.text, str)
   1719         self.assertEqual(e.text, 'changed')
   1720 
   1721     def test_lost_tail(self):
   1722         # Issue #25902: Borrowed tail can disappear
   1723         class Text:
   1724             def __bool__(self):
   1725                 e[0].tail = 'changed'
   1726                 return True
   1727 
   1728         e = ET.Element('root')
   1729         e.append(ET.Element('tag'))
   1730         e[0].tail = Text()
   1731         i = e.itertext()
   1732         t = next(i)
   1733         self.assertIsInstance(t, Text)
   1734         self.assertIsInstance(e[0].tail, str)
   1735         self.assertEqual(e[0].tail, 'changed')
   1736 
   1737     def test_lost_elem(self):
   1738         # Issue #25902: Borrowed element can disappear
   1739         class Tag:
   1740             def __eq__(self, other):
   1741                 e[0] = ET.Element('changed')
   1742                 next(i)
   1743                 return True
   1744 
   1745         e = ET.Element('root')
   1746         e.append(ET.Element(Tag()))
   1747         e.append(ET.Element('tag'))
   1748         i = e.iter('tag')
   1749         try:
   1750             t = next(i)
   1751         except ValueError:
   1752             self.skipTest('generators are not reentrant')
   1753         self.assertIsInstance(t.tag, Tag)
   1754         self.assertIsInstance(e[0].tag, str)
   1755         self.assertEqual(e[0].tag, 'changed')
   1756 
   1757     def check_expat224_utf8_bug(self, text):
   1758         xml = b'<a b="%s"/>' % text
   1759         root = ET.XML(xml)
   1760         self.assertEqual(root.get('b'), text.decode('utf-8'))
   1761 
   1762     def test_expat224_utf8_bug(self):
   1763         # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
   1764         # Check that Expat 2.2.4 fixed the bug.
   1765         #
   1766         # Test buffer bounds at odd and even positions.
   1767 
   1768         text = b'\xc3\xa0' * 1024
   1769         self.check_expat224_utf8_bug(text)
   1770 
   1771         text = b'x' + b'\xc3\xa0' * 1024
   1772         self.check_expat224_utf8_bug(text)
   1773 
   1774     def test_expat224_utf8_bug_file(self):
   1775         with open(UTF8_BUG_XMLFILE, 'rb') as fp:
   1776             raw = fp.read()
   1777         root = ET.fromstring(raw)
   1778         xmlattr = root.get('b')
   1779 
   1780         # "Parse" manually the XML file to extract the value of the 'b'
   1781         # attribute of the <a b='xxx' /> XML element
   1782         text = raw.decode('utf-8').strip()
   1783         text = text.replace('\r\n', ' ')
   1784         text = text[6:-4]
   1785         self.assertEqual(root.get('b'), text)
   1786 
   1787 
   1788 
   1789 # --------------------------------------------------------------------
   1790 
   1791 
   1792 class BasicElementTest(ElementTestCase, unittest.TestCase):
   1793     def test_augmentation_type_errors(self):
   1794         e = ET.Element('joe')
   1795         self.assertRaises(TypeError, e.append, 'b')
   1796         self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
   1797         self.assertRaises(TypeError, e.insert, 0, 'foo')
   1798 
   1799     def test_cyclic_gc(self):
   1800         class Dummy:
   1801             pass
   1802 
   1803         # Test the shortest cycle: d->element->d
   1804         d = Dummy()
   1805         d.dummyref = ET.Element('joe', attr=d)
   1806         wref = weakref.ref(d)
   1807         del d
   1808         gc_collect()
   1809         self.assertIsNone(wref())
   1810 
   1811         # A longer cycle: d->e->e2->d
   1812         e = ET.Element('joe')
   1813         d = Dummy()
   1814         d.dummyref = e
   1815         wref = weakref.ref(d)
   1816         e2 = ET.SubElement(e, 'foo', attr=d)
   1817         del d, e, e2
   1818         gc_collect()
   1819         self.assertIsNone(wref())
   1820 
   1821         # A cycle between Element objects as children of one another
   1822         # e1->e2->e3->e1
   1823         e1 = ET.Element('e1')
   1824         e2 = ET.Element('e2')
   1825         e3 = ET.Element('e3')
   1826         e1.append(e2)
   1827         e2.append(e2)
   1828         e3.append(e1)
   1829         wref = weakref.ref(e1)
   1830         del e1, e2, e3
   1831         gc_collect()
   1832         self.assertIsNone(wref())
   1833 
   1834     def test_weakref(self):
   1835         flag = False
   1836         def wref_cb(w):
   1837             nonlocal flag
   1838             flag = True
   1839         e = ET.Element('e')
   1840         wref = weakref.ref(e, wref_cb)
   1841         self.assertEqual(wref().tag, 'e')
   1842         del e
   1843         self.assertEqual(flag, True)
   1844         self.assertEqual(wref(), None)
   1845 
   1846     def test_get_keyword_args(self):
   1847         e1 = ET.Element('foo' , x=1, y=2, z=3)
   1848         self.assertEqual(e1.get('x', default=7), 1)
   1849         self.assertEqual(e1.get('w', default=7), 7)
   1850 
   1851     def test_pickle(self):
   1852         # issue #16076: the C implementation wasn't pickleable.
   1853         for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
   1854             for dumper, loader in product(self.modules, repeat=2):
   1855                 e = dumper.Element('foo', bar=42)
   1856                 e.text = "text goes here"
   1857                 e.tail = "opposite of head"
   1858                 dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
   1859                 e.append(dumper.Element('child'))
   1860                 e.findall('.//grandchild')[0].set('attr', 'other value')
   1861 
   1862                 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
   1863                                           dumper, loader, proto)
   1864 
   1865                 self.assertEqual(e2.tag, 'foo')
   1866                 self.assertEqual(e2.attrib['bar'], 42)
   1867                 self.assertEqual(len(e2), 2)
   1868                 self.assertEqualElements(e, e2)
   1869 
   1870     def test_pickle_issue18997(self):
   1871         for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
   1872             for dumper, loader in product(self.modules, repeat=2):
   1873                 XMLTEXT = """<?xml version="1.0"?>
   1874                     <group><dogs>4</dogs>
   1875                     </group>"""
   1876                 e1 = dumper.fromstring(XMLTEXT)
   1877                 if hasattr(e1, '__getstate__'):
   1878                     self.assertEqual(e1.__getstate__()['tag'], 'group')
   1879                 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
   1880                                           dumper, loader, proto)
   1881                 self.assertEqual(e2.tag, 'group')
   1882                 self.assertEqual(e2[0].tag, 'dogs')
   1883 
   1884 
   1885 class BadElementTest(ElementTestCase, unittest.TestCase):
   1886     def test_extend_mutable_list(self):
   1887         class X:
   1888             @property
   1889             def __class__(self):
   1890                 L[:] = [ET.Element('baz')]
   1891                 return ET.Element
   1892         L = [X()]
   1893         e = ET.Element('foo')
   1894         try:
   1895             e.extend(L)
   1896         except TypeError:
   1897             pass
   1898 
   1899         class Y(X, ET.Element):
   1900             pass
   1901         L = [Y('x')]
   1902         e = ET.Element('foo')
   1903         e.extend(L)
   1904 
   1905     def test_extend_mutable_list2(self):
   1906         class X:
   1907             @property
   1908             def __class__(self):
   1909                 del L[:]
   1910                 return ET.Element
   1911         L = [X(), ET.Element('baz')]
   1912         e = ET.Element('foo')
   1913         try:
   1914             e.extend(L)
   1915         except TypeError:
   1916             pass
   1917 
   1918         class Y(X, ET.Element):
   1919             pass
   1920         L = [Y('bar'), ET.Element('baz')]
   1921         e = ET.Element('foo')
   1922         e.extend(L)
   1923 
   1924     def test_remove_with_mutating(self):
   1925         class X(ET.Element):
   1926             def __eq__(self, o):
   1927                 del e[:]
   1928                 return False
   1929         e = ET.Element('foo')
   1930         e.extend([X('bar')])
   1931         self.assertRaises(ValueError, e.remove, ET.Element('baz'))
   1932 
   1933         e = ET.Element('foo')
   1934         e.extend([ET.Element('bar')])
   1935         self.assertRaises(ValueError, e.remove, X('baz'))
   1936 
   1937     def test_recursive_repr(self):
   1938         # Issue #25455
   1939         e = ET.Element('foo')
   1940         with swap_attr(e, 'tag', e):
   1941             with self.assertRaises(RuntimeError):
   1942                 repr(e)  # Should not crash
   1943 
   1944     def test_element_get_text(self):
   1945         # Issue #27863
   1946         class X(str):
   1947             def __del__(self):
   1948                 try:
   1949                     elem.text
   1950                 except NameError:
   1951                     pass
   1952 
   1953         b = ET.TreeBuilder()
   1954         b.start('tag', {})
   1955         b.data('ABCD')
   1956         b.data(X('EFGH'))
   1957         b.data('IJKL')
   1958         b.end('tag')
   1959 
   1960         elem = b.close()
   1961         self.assertEqual(elem.text, 'ABCDEFGHIJKL')
   1962 
   1963     def test_element_get_tail(self):
   1964         # Issue #27863
   1965         class X(str):
   1966             def __del__(self):
   1967                 try:
   1968                     elem[0].tail
   1969                 except NameError:
   1970                     pass
   1971 
   1972         b = ET.TreeBuilder()
   1973         b.start('root', {})
   1974         b.start('tag', {})
   1975         b.end('tag')
   1976         b.data('ABCD')
   1977         b.data(X('EFGH'))
   1978         b.data('IJKL')
   1979         b.end('root')
   1980 
   1981         elem = b.close()
   1982         self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
   1983 
   1984     def test_element_iter(self):
   1985         # Issue #27863
   1986         state = {
   1987             'tag': 'tag',
   1988             '_children': [None],  # non-Element
   1989             'attrib': 'attr',
   1990             'tail': 'tail',
   1991             'text': 'text',
   1992         }
   1993 
   1994         e = ET.Element('tag')
   1995         try:
   1996             e.__setstate__(state)
   1997         except AttributeError:
   1998             e.__dict__ = state
   1999 
   2000         it = e.iter()
   2001         self.assertIs(next(it), e)
   2002         self.assertRaises(AttributeError, next, it)
   2003 
   2004     def test_subscr(self):
   2005         # Issue #27863
   2006         class X:
   2007             def __index__(self):
   2008                 del e[:]
   2009                 return 1
   2010 
   2011         e = ET.Element('elem')
   2012         e.append(ET.Element('child'))
   2013         e[:X()]  # shouldn't crash
   2014 
   2015         e.append(ET.Element('child'))
   2016         e[0:10:X()]  # shouldn't crash
   2017 
   2018     def test_ass_subscr(self):
   2019         # Issue #27863
   2020         class X:
   2021             def __index__(self):
   2022                 e[:] = []
   2023                 return 1
   2024 
   2025         e = ET.Element('elem')
   2026         for _ in range(10):
   2027             e.insert(0, ET.Element('child'))
   2028 
   2029         e[0:10:X()] = []  # shouldn't crash
   2030 
   2031     def test_treebuilder_start(self):
   2032         # Issue #27863
   2033         def element_factory(x, y):
   2034             return []
   2035         b = ET.TreeBuilder(element_factory=element_factory)
   2036 
   2037         b.start('tag', {})
   2038         b.data('ABCD')
   2039         self.assertRaises(AttributeError, b.start, 'tag2', {})
   2040         del b
   2041         gc_collect()
   2042 
   2043     def test_treebuilder_end(self):
   2044         # Issue #27863
   2045         def element_factory(x, y):
   2046             return []
   2047         b = ET.TreeBuilder(element_factory=element_factory)
   2048 
   2049         b.start('tag', {})
   2050         b.data('ABCD')
   2051         self.assertRaises(AttributeError, b.end, 'tag')
   2052         del b
   2053         gc_collect()
   2054 
   2055 
   2056 class MutatingElementPath(str):
   2057     def __new__(cls, elem, *args):
   2058         self = str.__new__(cls, *args)
   2059         self.elem = elem
   2060         return self
   2061     def __eq__(self, o):
   2062         del self.elem[:]
   2063         return True
   2064 MutatingElementPath.__hash__ = str.__hash__
   2065 
   2066 class BadElementPath(str):
   2067     def __eq__(self, o):
   2068         raise 1/0
   2069 BadElementPath.__hash__ = str.__hash__
   2070 
   2071 class BadElementPathTest(ElementTestCase, unittest.TestCase):
   2072     def setUp(self):
   2073         super().setUp()
   2074         from xml.etree import ElementPath
   2075         self.path_cache = ElementPath._cache
   2076         ElementPath._cache = {}
   2077 
   2078     def tearDown(self):
   2079         from xml.etree import ElementPath
   2080         ElementPath._cache = self.path_cache
   2081         super().tearDown()
   2082 
   2083     def test_find_with_mutating(self):
   2084         e = ET.Element('foo')
   2085         e.extend([ET.Element('bar')])
   2086         e.find(MutatingElementPath(e, 'x'))
   2087 
   2088     def test_find_with_error(self):
   2089         e = ET.Element('foo')
   2090         e.extend([ET.Element('bar')])
   2091         try:
   2092             e.find(BadElementPath('x'))
   2093         except ZeroDivisionError:
   2094             pass
   2095 
   2096     def test_findtext_with_mutating(self):
   2097         e = ET.Element('foo')
   2098         e.extend([ET.Element('bar')])
   2099         e.findtext(MutatingElementPath(e, 'x'))
   2100 
   2101     def test_findtext_with_error(self):
   2102         e = ET.Element('foo')
   2103         e.extend([ET.Element('bar')])
   2104         try:
   2105             e.findtext(BadElementPath('x'))
   2106         except ZeroDivisionError:
   2107             pass
   2108 
   2109     def test_findall_with_mutating(self):
   2110         e = ET.Element('foo')
   2111         e.extend([ET.Element('bar')])
   2112         e.findall(MutatingElementPath(e, 'x'))
   2113 
   2114     def test_findall_with_error(self):
   2115         e = ET.Element('foo')
   2116         e.extend([ET.Element('bar')])
   2117         try:
   2118             e.findall(BadElementPath('x'))
   2119         except ZeroDivisionError:
   2120             pass
   2121 
   2122 
   2123 class ElementTreeTypeTest(unittest.TestCase):
   2124     def test_istype(self):
   2125         self.assertIsInstance(ET.ParseError, type)
   2126         self.assertIsInstance(ET.QName, type)
   2127         self.assertIsInstance(ET.ElementTree, type)
   2128         self.assertIsInstance(ET.Element, type)
   2129         self.assertIsInstance(ET.TreeBuilder, type)
   2130         self.assertIsInstance(ET.XMLParser, type)
   2131 
   2132     def test_Element_subclass_trivial(self):
   2133         class MyElement(ET.Element):
   2134             pass
   2135 
   2136         mye = MyElement('foo')
   2137         self.assertIsInstance(mye, ET.Element)
   2138         self.assertIsInstance(mye, MyElement)
   2139         self.assertEqual(mye.tag, 'foo')
   2140 
   2141         # test that attribute assignment works (issue 14849)
   2142         mye.text = "joe"
   2143         self.assertEqual(mye.text, "joe")
   2144 
   2145     def test_Element_subclass_constructor(self):
   2146         class MyElement(ET.Element):
   2147             def __init__(self, tag, attrib={}, **extra):
   2148                 super(MyElement, self).__init__(tag + '__', attrib, **extra)
   2149 
   2150         mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
   2151         self.assertEqual(mye.tag, 'foo__')
   2152         self.assertEqual(sorted(mye.items()),
   2153             [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
   2154 
   2155     def test_Element_subclass_new_method(self):
   2156         class MyElement(ET.Element):
   2157             def newmethod(self):
   2158                 return self.tag
   2159 
   2160         mye = MyElement('joe')
   2161         self.assertEqual(mye.newmethod(), 'joe')
   2162 
   2163     def test_Element_subclass_find(self):
   2164         class MyElement(ET.Element):
   2165             pass
   2166 
   2167         e = ET.Element('foo')
   2168         e.text = 'text'
   2169         sub = MyElement('bar')
   2170         sub.text = 'subtext'
   2171         e.append(sub)
   2172         self.assertEqual(e.findtext('bar'), 'subtext')
   2173         self.assertEqual(e.find('bar').tag, 'bar')
   2174         found = list(e.findall('bar'))
   2175         self.assertEqual(len(found), 1, found)
   2176         self.assertEqual(found[0].tag, 'bar')
   2177 
   2178 
   2179 class ElementFindTest(unittest.TestCase):
   2180     def test_find_simple(self):
   2181         e = ET.XML(SAMPLE_XML)
   2182         self.assertEqual(e.find('tag').tag, 'tag')
   2183         self.assertEqual(e.find('section/tag').tag, 'tag')
   2184         self.assertEqual(e.find('./tag').tag, 'tag')
   2185 
   2186         e[2] = ET.XML(SAMPLE_SECTION)
   2187         self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
   2188 
   2189         self.assertEqual(e.findtext('./tag'), 'text')
   2190         self.assertEqual(e.findtext('section/tag'), 'subtext')
   2191 
   2192         # section/nexttag is found but has no text
   2193         self.assertEqual(e.findtext('section/nexttag'), '')
   2194         self.assertEqual(e.findtext('section/nexttag', 'default'), '')
   2195 
   2196         # tog doesn't exist and 'default' kicks in
   2197         self.assertIsNone(e.findtext('tog'))
   2198         self.assertEqual(e.findtext('tog', 'default'), 'default')
   2199 
   2200         # Issue #16922
   2201         self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
   2202 
   2203     def test_find_xpath(self):
   2204         LINEAR_XML = '''
   2205         <body>
   2206             <tag class='a'/>
   2207             <tag class='b'/>
   2208             <tag class='c'/>
   2209             <tag class='d'/>
   2210         </body>'''
   2211         e = ET.XML(LINEAR_XML)
   2212 
   2213         # Test for numeric indexing and last()
   2214         self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
   2215         self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
   2216         self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
   2217         self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
   2218         self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
   2219 
   2220         self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
   2221         self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
   2222         self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
   2223         self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
   2224 
   2225     def test_findall(self):
   2226         e = ET.XML(SAMPLE_XML)
   2227         e[2] = ET.XML(SAMPLE_SECTION)
   2228         self.assertEqual(summarize_list(e.findall('.')), ['body'])
   2229         self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
   2230         self.assertEqual(summarize_list(e.findall('tog')), [])
   2231         self.assertEqual(summarize_list(e.findall('tog/foo')), [])
   2232         self.assertEqual(summarize_list(e.findall('*')),
   2233             ['tag', 'tag', 'section'])
   2234         self.assertEqual(summarize_list(e.findall('.//tag')),
   2235             ['tag'] * 4)
   2236         self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
   2237         self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
   2238         self.assertEqual(summarize_list(e.findall('section/*')),
   2239             ['tag', 'nexttag', 'nextsection'])
   2240         self.assertEqual(summarize_list(e.findall('section//*')),
   2241             ['tag', 'nexttag', 'nextsection', 'tag'])
   2242         self.assertEqual(summarize_list(e.findall('section/.//*')),
   2243             ['tag', 'nexttag', 'nextsection', 'tag'])
   2244         self.assertEqual(summarize_list(e.findall('*/*')),
   2245             ['tag', 'nexttag', 'nextsection'])
   2246         self.assertEqual(summarize_list(e.findall('*//*')),
   2247             ['tag', 'nexttag', 'nextsection', 'tag'])
   2248         self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
   2249         self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
   2250         self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
   2251         self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
   2252 
   2253         self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
   2254             ['tag'] * 3)
   2255         self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
   2256             ['tag'])
   2257         self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
   2258             ['tag'] * 2)
   2259         self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
   2260             ['tag'])
   2261         self.assertEqual(summarize_list(e.findall('.//section[tag]')),
   2262             ['section'])
   2263         self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
   2264         self.assertEqual(summarize_list(e.findall('../tag')), [])
   2265         self.assertEqual(summarize_list(e.findall('section/../tag')),
   2266             ['tag'] * 2)
   2267         self.assertEqual(e.findall('section//'), e.findall('section//*'))
   2268 
   2269         self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
   2270             ['section'])
   2271         self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
   2272             ['section'])
   2273         self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
   2274             ['section'])
   2275         self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
   2276             ['section'])
   2277         self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
   2278             ['section'])
   2279 
   2280         self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
   2281                          ['tag'])
   2282         self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
   2283                          ['tag'])
   2284         self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
   2285                          ['tag'])
   2286         self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
   2287                          ['tag'])
   2288         self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
   2289                          ['tag'])
   2290         self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
   2291                          [])
   2292         self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
   2293                          [])
   2294 
   2295         # duplicate section => 2x tag matches
   2296         e[1] = e[2]
   2297         self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
   2298                          ['section', 'section'])
   2299         self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
   2300                          ['tag', 'tag'])
   2301 
   2302     def test_test_find_with_ns(self):
   2303         e = ET.XML(SAMPLE_XML_NS)
   2304         self.assertEqual(summarize_list(e.findall('tag')), [])
   2305         self.assertEqual(
   2306             summarize_list(e.findall("{http://effbot.org/ns}tag")),
   2307             ['{http://effbot.org/ns}tag'] * 2)
   2308         self.assertEqual(
   2309             summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
   2310             ['{http://effbot.org/ns}tag'] * 3)
   2311 
   2312     def test_findall_different_nsmaps(self):
   2313         root = ET.XML('''
   2314             <a xmlns:x="X" xmlns:y="Y">
   2315                 <x:b><c/></x:b>
   2316                 <b/>
   2317                 <c><x:b/><b/></c><y:b/>
   2318             </a>''')
   2319         nsmap = {'xx': 'X'}
   2320         self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
   2321         self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
   2322         nsmap = {'xx': 'Y'}
   2323         self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
   2324         self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
   2325 
   2326     def test_bad_find(self):
   2327         e = ET.XML(SAMPLE_XML)
   2328         with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
   2329             e.findall('/tag')
   2330 
   2331     def test_find_through_ElementTree(self):
   2332         e = ET.XML(SAMPLE_XML)
   2333         self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
   2334         self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
   2335         self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
   2336             ['tag'] * 2)
   2337         # this produces a warning
   2338         msg = ("This search is broken in 1.3 and earlier, and will be fixed "
   2339                "in a future version.  If you rely on the current behaviour, "
   2340                "change it to '.+'")
   2341         with self.assertWarnsRegex(FutureWarning, msg):
   2342             it = ET.ElementTree(e).findall('//tag')
   2343         self.assertEqual(summarize_list(it), ['tag'] * 3)
   2344 
   2345 
   2346 class ElementIterTest(unittest.TestCase):
   2347     def _ilist(self, elem, tag=None):
   2348         return summarize_list(elem.iter(tag))
   2349 
   2350     def test_basic(self):
   2351         doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
   2352         self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
   2353         self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
   2354         self.assertEqual(next(doc.iter()).tag, 'html')
   2355         self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
   2356         self.assertEqual(''.join(doc.find('body').itertext()),
   2357             'this is a paragraph.')
   2358         self.assertEqual(next(doc.itertext()), 'this is a ')
   2359 
   2360         # iterparse should return an iterator
   2361         sourcefile = serialize(doc, to_string=False)
   2362         self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
   2363 
   2364         # With an explicit parser too (issue #9708)
   2365         sourcefile = serialize(doc, to_string=False)
   2366         parser = ET.XMLParser(target=ET.TreeBuilder())
   2367         self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
   2368                          'end')
   2369 
   2370         tree = ET.ElementTree(None)
   2371         self.assertRaises(AttributeError, tree.iter)
   2372 
   2373         # Issue #16913
   2374         doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
   2375         self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
   2376 
   2377     def test_corners(self):
   2378         # single root, no subelements
   2379         a = ET.Element('a')
   2380         self.assertEqual(self._ilist(a), ['a'])
   2381 
   2382         # one child
   2383         b = ET.SubElement(a, 'b')
   2384         self.assertEqual(self._ilist(a), ['a', 'b'])
   2385 
   2386         # one child and one grandchild
   2387         c = ET.SubElement(b, 'c')
   2388         self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
   2389 
   2390         # two children, only first with grandchild
   2391         d = ET.SubElement(a, 'd')
   2392         self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
   2393 
   2394         # replace first child by second
   2395         a[0] = a[1]
   2396         del a[1]
   2397         self.assertEqual(self._ilist(a), ['a', 'd'])
   2398 
   2399     def test_iter_by_tag(self):
   2400         doc = ET.XML('''
   2401             <document>
   2402                 <house>
   2403                     <room>bedroom1</room>
   2404                     <room>bedroom2</room>
   2405                 </house>
   2406                 <shed>nothing here
   2407                 </shed>
   2408                 <house>
   2409                     <room>bedroom8</room>
   2410                 </house>
   2411             </document>''')
   2412 
   2413         self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
   2414         self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
   2415 
   2416         # test that iter also accepts 'tag' as a keyword arg
   2417         self.assertEqual(
   2418             summarize_list(doc.iter(tag='room')),
   2419             ['room'] * 3)
   2420 
   2421         # make sure both tag=None and tag='*' return all tags
   2422         all_tags = ['document', 'house', 'room', 'room',
   2423                     'shed', 'house', 'room']
   2424         self.assertEqual(summarize_list(doc.iter()), all_tags)
   2425         self.assertEqual(self._ilist(doc), all_tags)
   2426         self.assertEqual(self._ilist(doc, '*'), all_tags)
   2427 
   2428     # Element.getiterator() is deprecated.
   2429     @checkwarnings(("This method will be removed in future versions.  "
   2430                     "Use .+ instead.", PendingDeprecationWarning))
   2431     def test_getiterator(self):
   2432         doc = ET.XML('''
   2433             <document>
   2434                 <house>
   2435                     <room>bedroom1</room>
   2436                     <room>bedroom2</room>
   2437                 </house>
   2438                 <shed>nothing here
   2439                 </shed>
   2440                 <house>
   2441                     <room>bedroom8</room>
   2442                 </house>
   2443             </document>''')
   2444 
   2445         self.assertEqual(summarize_list(doc.getiterator('room')),
   2446                          ['room'] * 3)
   2447         self.assertEqual(summarize_list(doc.getiterator('house')),
   2448                          ['house'] * 2)
   2449 
   2450         # test that getiterator also accepts 'tag' as a keyword arg
   2451         self.assertEqual(
   2452             summarize_list(doc.getiterator(tag='room')),
   2453             ['room'] * 3)
   2454 
   2455         # make sure both tag=None and tag='*' return all tags
   2456         all_tags = ['document', 'house', 'room', 'room',
   2457                     'shed', 'house', 'room']
   2458         self.assertEqual(summarize_list(doc.getiterator()), all_tags)
   2459         self.assertEqual(summarize_list(doc.getiterator(None)), all_tags)
   2460         self.assertEqual(summarize_list(doc.getiterator('*')), all_tags)
   2461 
   2462     def test_copy(self):
   2463         a = ET.Element('a')
   2464         it = a.iter()
   2465         with self.assertRaises(TypeError):
   2466             copy.copy(it)
   2467 
   2468     def test_pickle(self):
   2469         a = ET.Element('a')
   2470         it = a.iter()
   2471         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
   2472             with self.assertRaises((TypeError, pickle.PicklingError)):
   2473                 pickle.dumps(it, proto)
   2474 
   2475 
   2476 class TreeBuilderTest(unittest.TestCase):
   2477     sample1 = ('<!DOCTYPE html PUBLIC'
   2478         ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
   2479         ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
   2480         '<html>text<div>subtext</div>tail</html>')
   2481 
   2482     sample2 = '''<toplevel>sometext</toplevel>'''
   2483 
   2484     def _check_sample1_element(self, e):
   2485         self.assertEqual(e.tag, 'html')
   2486         self.assertEqual(e.text, 'text')
   2487         self.assertEqual(e.tail, None)
   2488         self.assertEqual(e.attrib, {})
   2489         children = list(e)
   2490         self.assertEqual(len(children), 1)
   2491         child = children[0]
   2492         self.assertEqual(child.tag, 'div')
   2493         self.assertEqual(child.text, 'subtext')
   2494         self.assertEqual(child.tail, 'tail')
   2495         self.assertEqual(child.attrib, {})
   2496 
   2497     def test_dummy_builder(self):
   2498         class BaseDummyBuilder:
   2499             def close(self):
   2500                 return 42
   2501 
   2502         class DummyBuilder(BaseDummyBuilder):
   2503             data = start = end = lambda *a: None
   2504 
   2505         parser = ET.XMLParser(target=DummyBuilder())
   2506         parser.feed(self.sample1)
   2507         self.assertEqual(parser.close(), 42)
   2508 
   2509         parser = ET.XMLParser(target=BaseDummyBuilder())
   2510         parser.feed(self.sample1)
   2511         self.assertEqual(parser.close(), 42)
   2512 
   2513         parser = ET.XMLParser(target=object())
   2514         parser.feed(self.sample1)
   2515         self.assertIsNone(parser.close())
   2516 
   2517     def test_treebuilder_elementfactory_none(self):
   2518         parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
   2519         parser.feed(self.sample1)
   2520         e = parser.close()
   2521         self._check_sample1_element(e)
   2522 
   2523     def test_subclass(self):
   2524         class MyTreeBuilder(ET.TreeBuilder):
   2525             def foobar(self, x):
   2526                 return x * 2
   2527 
   2528         tb = MyTreeBuilder()
   2529         self.assertEqual(tb.foobar(10), 20)
   2530 
   2531         parser = ET.XMLParser(target=tb)
   2532         parser.feed(self.sample1)
   2533 
   2534         e = parser.close()
   2535         self._check_sample1_element(e)
   2536 
   2537     def test_element_factory(self):
   2538         lst = []
   2539         def myfactory(tag, attrib):
   2540             nonlocal lst
   2541             lst.append(tag)
   2542             return ET.Element(tag, attrib)
   2543 
   2544         tb = ET.TreeBuilder(element_factory=myfactory)
   2545         parser = ET.XMLParser(target=tb)
   2546         parser.feed(self.sample2)
   2547         parser.close()
   2548 
   2549         self.assertEqual(lst, ['toplevel'])
   2550 
   2551     def _check_element_factory_class(self, cls):
   2552         tb = ET.TreeBuilder(element_factory=cls)
   2553 
   2554         parser = ET.XMLParser(target=tb)
   2555         parser.feed(self.sample1)
   2556         e = parser.close()
   2557         self.assertIsInstance(e, cls)
   2558         self._check_sample1_element(e)
   2559 
   2560     def test_element_factory_subclass(self):
   2561         class MyElement(ET.Element):
   2562             pass
   2563         self._check_element_factory_class(MyElement)
   2564 
   2565     def test_element_factory_pure_python_subclass(self):
   2566         # Mimick SimpleTAL's behaviour (issue #16089): both versions of
   2567         # TreeBuilder should be able to cope with a subclass of the
   2568         # pure Python Element class.
   2569         base = ET._Element_Py
   2570         # Not from a C extension
   2571         self.assertEqual(base.__module__, 'xml.etree.ElementTree')
   2572         # Force some multiple inheritance with a C class to make things
   2573         # more interesting.
   2574         class MyElement(base, ValueError):
   2575             pass
   2576         self._check_element_factory_class(MyElement)
   2577 
   2578     def test_doctype(self):
   2579         class DoctypeParser:
   2580             _doctype = None
   2581 
   2582             def doctype(self, name, pubid, system):
   2583                 self._doctype = (name, pubid, system)
   2584 
   2585             def close(self):
   2586                 return self._doctype
   2587 
   2588         parser = ET.XMLParser(target=DoctypeParser())
   2589         parser.feed(self.sample1)
   2590 
   2591         self.assertEqual(parser.close(),
   2592             ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
   2593              'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
   2594 
   2595     def test_builder_lookup_errors(self):
   2596         class RaisingBuilder:
   2597             def __init__(self, raise_in=None, what=ValueError):
   2598                 self.raise_in = raise_in
   2599                 self.what = what
   2600 
   2601             def __getattr__(self, name):
   2602                 if name == self.raise_in:
   2603                     raise self.what(self.raise_in)
   2604                 def handle(*args):
   2605                     pass
   2606                 return handle
   2607 
   2608         ET.XMLParser(target=RaisingBuilder())
   2609         # cET also checks for 'close' and 'doctype', PyET does it only at need
   2610         for event in ('start', 'data', 'end', 'comment', 'pi'):
   2611             with self.assertRaisesRegex(ValueError, event):
   2612                 ET.XMLParser(target=RaisingBuilder(event))
   2613 
   2614         ET.XMLParser(target=RaisingBuilder(what=AttributeError))
   2615         for event in ('start', 'data', 'end', 'comment', 'pi'):
   2616             parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
   2617             parser.feed(self.sample1)
   2618             self.assertIsNone(parser.close())
   2619 
   2620 
   2621 class XMLParserTest(unittest.TestCase):
   2622     sample1 = b'<file><line>22</line></file>'
   2623     sample2 = (b'<!DOCTYPE html PUBLIC'
   2624         b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
   2625         b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
   2626         b'<html>text</html>')
   2627     sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
   2628         '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
   2629 
   2630     def _check_sample_element(self, e):
   2631         self.assertEqual(e.tag, 'file')
   2632         self.assertEqual(e[0].tag, 'line')
   2633         self.assertEqual(e[0].text, '22')
   2634 
   2635     def test_constructor_args(self):
   2636         # Positional args. The first (html) is not supported, but should be
   2637         # nevertheless correctly accepted.
   2638         with self.assertWarnsRegex(DeprecationWarning, r'\bhtml\b'):
   2639             parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
   2640         parser.feed(self.sample1)
   2641         self._check_sample_element(parser.close())
   2642 
   2643         # Now as keyword args.
   2644         parser2 = ET.XMLParser(encoding='utf-8',
   2645                                target=ET.TreeBuilder())
   2646         parser2.feed(self.sample1)
   2647         self._check_sample_element(parser2.close())
   2648 
   2649     def test_subclass(self):
   2650         class MyParser(ET.XMLParser):
   2651             pass
   2652         parser = MyParser()
   2653         parser.feed(self.sample1)
   2654         self._check_sample_element(parser.close())
   2655 
   2656     def test_doctype_warning(self):
   2657         parser = ET.XMLParser()
   2658         with self.assertWarns(DeprecationWarning):
   2659             parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
   2660                 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
   2661         parser.feed('<html/>')
   2662         parser.close()
   2663 
   2664         with warnings.catch_warnings():
   2665             warnings.simplefilter('error', DeprecationWarning)
   2666             parser = ET.XMLParser()
   2667             parser.feed(self.sample2)
   2668             parser.close()
   2669 
   2670     def test_subclass_doctype(self):
   2671         _doctype = None
   2672         class MyParserWithDoctype(ET.XMLParser):
   2673             def doctype(self, name, pubid, system):
   2674                 nonlocal _doctype
   2675                 _doctype = (name, pubid, system)
   2676 
   2677         parser = MyParserWithDoctype()
   2678         with self.assertWarns(DeprecationWarning):
   2679             parser.feed(self.sample2)
   2680         parser.close()
   2681         self.assertEqual(_doctype,
   2682             ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
   2683              'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
   2684 
   2685         _doctype = _doctype2 = None
   2686         with warnings.catch_warnings():
   2687             warnings.simplefilter('error', DeprecationWarning)
   2688             class DoctypeParser:
   2689                 def doctype(self, name, pubid, system):
   2690                     nonlocal _doctype2
   2691                     _doctype2 = (name, pubid, system)
   2692 
   2693             parser = MyParserWithDoctype(target=DoctypeParser())
   2694             parser.feed(self.sample2)
   2695             parser.close()
   2696             self.assertIsNone(_doctype)
   2697             self.assertEqual(_doctype2,
   2698                 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
   2699                  'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
   2700 
   2701     def test_inherited_doctype(self):
   2702         '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
   2703         with warnings.catch_warnings():
   2704             warnings.simplefilter('error', DeprecationWarning)
   2705             class MyParserWithoutDoctype(ET.XMLParser):
   2706                 pass
   2707             parser = MyParserWithoutDoctype()
   2708             parser.feed(self.sample2)
   2709             parser.close()
   2710 
   2711     def test_parse_string(self):
   2712         parser = ET.XMLParser(target=ET.TreeBuilder())
   2713         parser.feed(self.sample3)
   2714         e = parser.close()
   2715         self.assertEqual(e.tag, 'money')
   2716         self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
   2717         self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
   2718 
   2719 
   2720 class NamespaceParseTest(unittest.TestCase):
   2721     def test_find_with_namespace(self):
   2722         nsmap = {'h': 'hello', 'f': 'foo'}
   2723         doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
   2724 
   2725         self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
   2726         self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
   2727         self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
   2728 
   2729 
   2730 class ElementSlicingTest(unittest.TestCase):
   2731     def _elem_tags(self, elemlist):
   2732         return [e.tag for e in elemlist]
   2733 
   2734     def _subelem_tags(self, elem):
   2735         return self._elem_tags(list(elem))
   2736 
   2737     def _make_elem_with_children(self, numchildren):
   2738         """Create an Element with a tag 'a', with the given amount of children
   2739            named 'a0', 'a1' ... and so on.
   2740 
   2741         """
   2742         e = ET.Element('a')
   2743         for i in range(numchildren):
   2744             ET.SubElement(e, 'a%s' % i)
   2745         return e
   2746 
   2747     def test_getslice_single_index(self):
   2748         e = self._make_elem_with_children(10)
   2749 
   2750         self.assertEqual(e[1].tag, 'a1')
   2751         self.assertEqual(e[-2].tag, 'a8')
   2752 
   2753         self.assertRaises(IndexError, lambda: e[12])
   2754         self.assertRaises(IndexError, lambda: e[-12])
   2755 
   2756     def test_getslice_range(self):
   2757         e = self._make_elem_with_children(6)
   2758 
   2759         self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
   2760         self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
   2761         self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
   2762         self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
   2763         self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
   2764         self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
   2765 
   2766     def test_getslice_steps(self):
   2767         e = self._make_elem_with_children(10)
   2768 
   2769         self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
   2770         self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
   2771         self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
   2772         self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
   2773         self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
   2774         self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
   2775 
   2776     def test_getslice_negative_steps(self):
   2777         e = self._make_elem_with_children(4)
   2778 
   2779         self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
   2780         self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
   2781         self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
   2782         self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
   2783         self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
   2784 
   2785     def test_delslice(self):
   2786         e = self._make_elem_with_children(4)
   2787         del e[0:2]
   2788         self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
   2789 
   2790         e = self._make_elem_with_children(4)
   2791         del e[0:]
   2792         self.assertEqual(self._subelem_tags(e), [])
   2793 
   2794         e = self._make_elem_with_children(4)
   2795         del e[::-1]
   2796         self.assertEqual(self._subelem_tags(e), [])
   2797 
   2798         e = self._make_elem_with_children(4)
   2799         del e[::-2]
   2800         self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
   2801 
   2802         e = self._make_elem_with_children(4)
   2803         del e[1::2]
   2804         self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
   2805 
   2806         e = self._make_elem_with_children(2)
   2807         del e[::2]
   2808         self.assertEqual(self._subelem_tags(e), ['a1'])
   2809 
   2810     def test_setslice_single_index(self):
   2811         e = self._make_elem_with_children(4)
   2812         e[1] = ET.Element('b')
   2813         self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
   2814 
   2815         e[-2] = ET.Element('c')
   2816         self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
   2817 
   2818         with self.assertRaises(IndexError):
   2819             e[5] = ET.Element('d')
   2820         with self.assertRaises(IndexError):
   2821             e[-5] = ET.Element('d')
   2822         self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
   2823 
   2824     def test_setslice_range(self):
   2825         e = self._make_elem_with_children(4)
   2826         e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
   2827         self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
   2828 
   2829         e = self._make_elem_with_children(4)
   2830         e[1:3] = [ET.Element('b')]
   2831         self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
   2832 
   2833         e = self._make_elem_with_children(4)
   2834         e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
   2835         self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
   2836 
   2837     def test_setslice_steps(self):
   2838         e = self._make_elem_with_children(6)
   2839         e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
   2840         self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
   2841 
   2842         e = self._make_elem_with_children(6)
   2843         with self.assertRaises(ValueError):
   2844             e[1:5:2] = [ET.Element('b')]
   2845         with self.assertRaises(ValueError):
   2846             e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
   2847         with self.assertRaises(ValueError):
   2848             e[1:5:2] = []
   2849         self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
   2850 
   2851         e = self._make_elem_with_children(4)
   2852         e[1::sys.maxsize] = [ET.Element('b')]
   2853         self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
   2854         e[1::sys.maxsize<<64] = [ET.Element('c')]
   2855         self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
   2856 
   2857     def test_setslice_negative_steps(self):
   2858         e = self._make_elem_with_children(4)
   2859         e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
   2860         self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
   2861 
   2862         e = self._make_elem_with_children(4)
   2863         with self.assertRaises(ValueError):
   2864             e[2:0:-1] = [ET.Element('b')]
   2865         with self.assertRaises(ValueError):
   2866             e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
   2867         with self.assertRaises(ValueError):
   2868             e[2:0:-1] = []
   2869         self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
   2870 
   2871         e = self._make_elem_with_children(4)
   2872         e[1::-sys.maxsize] = [ET.Element('b')]
   2873         self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
   2874         e[1::-sys.maxsize-1] = [ET.Element('c')]
   2875         self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
   2876         e[1::-sys.maxsize<<64] = [ET.Element('d')]
   2877         self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
   2878 
   2879 
   2880 class IOTest(unittest.TestCase):
   2881     def test_encoding(self):
   2882         # Test encoding issues.
   2883         elem = ET.Element("tag")
   2884         elem.text = "abc"
   2885         self.assertEqual(serialize(elem), '<tag>abc</tag>')
   2886         for enc in ("utf-8", "us-ascii"):
   2887             with self.subTest(enc):
   2888                 self.assertEqual(serialize(elem, encoding=enc),
   2889                         b'<tag>abc</tag>')
   2890                 self.assertEqual(serialize(elem, encoding=enc.upper()),
   2891                         b'<tag>abc</tag>')
   2892         for enc in ("iso-8859-1", "utf-16", "utf-32"):
   2893             with self.subTest(enc):
   2894                 self.assertEqual(serialize(elem, encoding=enc),
   2895                         ("<?xml version='1.0' encoding='%s'?>\n"
   2896                          "<tag>abc</tag>" % enc).encode(enc))
   2897                 upper = enc.upper()
   2898                 self.assertEqual(serialize(elem, encoding=upper),
   2899                         ("<?xml version='1.0' encoding='%s'?>\n"
   2900                          "<tag>abc</tag>" % upper).encode(enc))
   2901 
   2902         elem = ET.Element("tag")
   2903         elem.text = "<&\"\'>"
   2904         self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
   2905         self.assertEqual(serialize(elem, encoding="utf-8"),
   2906                 b'<tag>&lt;&amp;"\'&gt;</tag>')
   2907         self.assertEqual(serialize(elem, encoding="us-ascii"),
   2908                 b'<tag>&lt;&amp;"\'&gt;</tag>')
   2909         for enc in ("iso-8859-1", "utf-16", "utf-32"):
   2910             self.assertEqual(serialize(elem, encoding=enc),
   2911                     ("<?xml version='1.0' encoding='%s'?>\n"
   2912                      "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
   2913 
   2914         elem = ET.Element("tag")
   2915         elem.attrib["key"] = "<&\"\'>"
   2916         self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
   2917         self.assertEqual(serialize(elem, encoding="utf-8"),
   2918                 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
   2919         self.assertEqual(serialize(elem, encoding="us-ascii"),
   2920                 b'<tag key="&lt;&amp;&quot;\'&gt;" />')
   2921         for enc in ("iso-8859-1", "utf-16", "utf-32"):
   2922             self.assertEqual(serialize(elem, encoding=enc),
   2923                     ("<?xml version='1.0' encoding='%s'?>\n"
   2924                      "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
   2925 
   2926         elem = ET.Element("tag")
   2927         elem.text = '\xe5\xf6\xf6<>'
   2928         self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
   2929         self.assertEqual(serialize(elem, encoding="utf-8"),
   2930                 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
   2931         self.assertEqual(serialize(elem, encoding="us-ascii"),
   2932                 b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
   2933         for enc in ("iso-8859-1", "utf-16", "utf-32"):
   2934             self.assertEqual(serialize(elem, encoding=enc),
   2935                     ("<?xml version='1.0' encoding='%s'?>\n"
   2936                      "<tag>&lt;&gt;</tag>" % enc).encode(enc))
   2937 
   2938         elem = ET.Element("tag")
   2939         elem.attrib["key"] = '\xe5\xf6\xf6<>'
   2940         self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
   2941         self.assertEqual(serialize(elem, encoding="utf-8"),
   2942                 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
   2943         self.assertEqual(serialize(elem, encoding="us-ascii"),
   2944                 b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
   2945         for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
   2946             self.assertEqual(serialize(elem, encoding=enc),
   2947                     ("<?xml version='1.0' encoding='%s'?>\n"
   2948                      "<tag key=\"&lt;&gt;\" />" % enc).encode(enc))
   2949 
   2950     def test_write_to_filename(self):
   2951         self.addCleanup(support.unlink, TESTFN)
   2952         tree = ET.ElementTree(ET.XML('''<site />'''))
   2953         tree.write(TESTFN)
   2954         with open(TESTFN, 'rb') as f:
   2955             self.assertEqual(f.read(), b'''<site />''')
   2956 
   2957     def test_write_to_text_file(self):
   2958         self.addCleanup(support.unlink, TESTFN)
   2959         tree = ET.ElementTree(ET.XML('''<site />'''))
   2960         with open(TESTFN, 'w', encoding='utf-8') as f:
   2961             tree.write(f, encoding='unicode')
   2962             self.assertFalse(f.closed)
   2963         with open(TESTFN, 'rb') as f:
   2964             self.assertEqual(f.read(), b'''<site />''')
   2965 
   2966     def test_write_to_binary_file(self):
   2967         self.addCleanup(support.unlink, TESTFN)
   2968         tree = ET.ElementTree(ET.XML('''<site />'''))
   2969         with open(TESTFN, 'wb') as f:
   2970             tree.write(f)
   2971             self.assertFalse(f.closed)
   2972         with open(TESTFN, 'rb') as f:
   2973             self.assertEqual(f.read(), b'''<site />''')
   2974 
   2975     def test_write_to_binary_file_with_bom(self):
   2976         self.addCleanup(support.unlink, TESTFN)
   2977         tree = ET.ElementTree(ET.XML('''<site />'''))
   2978         # test BOM writing to buffered file
   2979         with open(TESTFN, 'wb') as f:
   2980             tree.write(f, encoding='utf-16')
   2981             self.assertFalse(f.closed)
   2982         with open(TESTFN, 'rb') as f:
   2983             self.assertEqual(f.read(),
   2984                     '''<?xml version='1.0' encoding='utf-16'?>\n'''
   2985                     '''<site />'''.encode("utf-16"))
   2986         # test BOM writing to non-buffered file
   2987         with open(TESTFN, 'wb', buffering=0) as f:
   2988             tree.write(f, encoding='utf-16')
   2989             self.assertFalse(f.closed)
   2990         with open(TESTFN, 'rb') as f:
   2991             self.assertEqual(f.read(),
   2992                     '''<?xml version='1.0' encoding='utf-16'?>\n'''
   2993                     '''<site />'''.encode("utf-16"))
   2994 
   2995     def test_read_from_stringio(self):
   2996         tree = ET.ElementTree()
   2997         stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
   2998         tree.parse(stream)
   2999         self.assertEqual(tree.getroot().tag, 'site')
   3000 
   3001     def test_write_to_stringio(self):
   3002         tree = ET.ElementTree(ET.XML('''<site />'''))
   3003         stream = io.StringIO()
   3004         tree.write(stream, encoding='unicode')
   3005         self.assertEqual(stream.getvalue(), '''<site />''')
   3006 
   3007     def test_read_from_bytesio(self):
   3008         tree = ET.ElementTree()
   3009         raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
   3010         tree.parse(raw)
   3011         self.assertEqual(tree.getroot().tag, 'site')
   3012 
   3013     def test_write_to_bytesio(self):
   3014         tree = ET.ElementTree(ET.XML('''<site />'''))
   3015         raw = io.BytesIO()
   3016         tree.write(raw)
   3017         self.assertEqual(raw.getvalue(), b'''<site />''')
   3018 
   3019     class dummy:
   3020         pass
   3021 
   3022     def test_read_from_user_text_reader(self):
   3023         stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
   3024         reader = self.dummy()
   3025         reader.read = stream.read
   3026         tree = ET.ElementTree()
   3027         tree.parse(reader)
   3028         self.assertEqual(tree.getroot().tag, 'site')
   3029 
   3030     def test_write_to_user_text_writer(self):
   3031         tree = ET.ElementTree(ET.XML('''<site />'''))
   3032         stream = io.StringIO()
   3033         writer = self.dummy()
   3034         writer.write = stream.write
   3035         tree.write(writer, encoding='unicode')
   3036         self.assertEqual(stream.getvalue(), '''<site />''')
   3037 
   3038     def test_read_from_user_binary_reader(self):
   3039         raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
   3040         reader = self.dummy()
   3041         reader.read = raw.read
   3042         tree = ET.ElementTree()
   3043         tree.parse(reader)
   3044         self.assertEqual(tree.getroot().tag, 'site')
   3045         tree = ET.ElementTree()
   3046 
   3047     def test_write_to_user_binary_writer(self):
   3048         tree = ET.ElementTree(ET.XML('''<site />'''))
   3049         raw = io.BytesIO()
   3050         writer = self.dummy()
   3051         writer.write = raw.write
   3052         tree.write(writer)
   3053         self.assertEqual(raw.getvalue(), b'''<site />''')
   3054 
   3055     def test_write_to_user_binary_writer_with_bom(self):
   3056         tree = ET.ElementTree(ET.XML('''<site />'''))
   3057         raw = io.BytesIO()
   3058         writer = self.dummy()
   3059         writer.write = raw.write
   3060         writer.seekable = lambda: True
   3061         writer.tell = raw.tell
   3062         tree.write(writer, encoding="utf-16")
   3063         self.assertEqual(raw.getvalue(),
   3064                 '''<?xml version='1.0' encoding='utf-16'?>\n'''
   3065                 '''<site />'''.encode("utf-16"))
   3066 
   3067     def test_tostringlist_invariant(self):
   3068         root = ET.fromstring('<tag>foo</tag>')
   3069         self.assertEqual(
   3070             ET.tostring(root, 'unicode'),
   3071             ''.join(ET.tostringlist(root, 'unicode')))
   3072         self.assertEqual(
   3073             ET.tostring(root, 'utf-16'),
   3074             b''.join(ET.tostringlist(root, 'utf-16')))
   3075 
   3076     def test_short_empty_elements(self):
   3077         root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
   3078         self.assertEqual(
   3079             ET.tostring(root, 'unicode'),
   3080             '<tag>a<x />b<y />c</tag>')
   3081         self.assertEqual(
   3082             ET.tostring(root, 'unicode', short_empty_elements=True),
   3083             '<tag>a<x />b<y />c</tag>')
   3084         self.assertEqual(
   3085             ET.tostring(root, 'unicode', short_empty_elements=False),
   3086             '<tag>a<x></x>b<y></y>c</tag>')
   3087 
   3088 
   3089 class ParseErrorTest(unittest.TestCase):
   3090     def test_subclass(self):
   3091         self.assertIsInstance(ET.ParseError(), SyntaxError)
   3092 
   3093     def _get_error(self, s):
   3094         try:
   3095             ET.fromstring(s)
   3096         except ET.ParseError as e:
   3097             return e
   3098 
   3099     def test_error_position(self):
   3100         self.assertEqual(self._get_error('foo').position, (1, 0))
   3101         self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
   3102         self.assertEqual(self._get_error('foobar<').position, (1, 6))
   3103 
   3104     def test_error_code(self):
   3105         import xml.parsers.expat.errors as ERRORS
   3106         self.assertEqual(self._get_error('foo').code,
   3107                 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
   3108 
   3109 
   3110 class KeywordArgsTest(unittest.TestCase):
   3111     # Test various issues with keyword arguments passed to ET.Element
   3112     # constructor and methods
   3113     def test_issue14818(self):
   3114         x = ET.XML("<a>foo</a>")
   3115         self.assertEqual(x.find('a', None),
   3116                          x.find(path='a', namespaces=None))
   3117         self.assertEqual(x.findtext('a', None, None),
   3118                          x.findtext(path='a', default=None, namespaces=None))
   3119         self.assertEqual(x.findall('a', None),
   3120                          x.findall(path='a', namespaces=None))
   3121         self.assertEqual(list(x.iterfind('a', None)),
   3122                          list(x.iterfind(path='a', namespaces=None)))
   3123 
   3124         self.assertEqual(ET.Element('a').attrib, {})
   3125         elements = [
   3126             ET.Element('a', dict(href="#", id="foo")),
   3127             ET.Element('a', attrib=dict(href="#", id="foo")),
   3128             ET.Element('a', dict(href="#"), id="foo"),
   3129             ET.Element('a', href="#", id="foo"),
   3130             ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
   3131         ]
   3132         for e in elements:
   3133             self.assertEqual(e.tag, 'a')
   3134             self.assertEqual(e.attrib, dict(href="#", id="foo"))
   3135 
   3136         e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
   3137         self.assertEqual(e2.attrib['key1'], 'value1')
   3138 
   3139         with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
   3140             ET.Element('a', "I'm not a dict")
   3141         with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
   3142             ET.Element('a', attrib="I'm not a dict")
   3143 
   3144 # --------------------------------------------------------------------
   3145 
   3146 class NoAcceleratorTest(unittest.TestCase):
   3147     def setUp(self):
   3148         if not pyET:
   3149             raise unittest.SkipTest('only for the Python version')
   3150 
   3151     # Test that the C accelerator was not imported for pyET
   3152     def test_correct_import_pyET(self):
   3153         # The type of methods defined in Python code is types.FunctionType,
   3154         # while the type of methods defined inside _elementtree is
   3155         # <class 'wrapper_descriptor'>
   3156         self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
   3157         self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
   3158 
   3159 # --------------------------------------------------------------------
   3160 
   3161 
   3162 def test_main(module=None):
   3163     # When invoked without a module, runs the Python ET tests by loading pyET.
   3164     # Otherwise, uses the given module as the ET.
   3165     global pyET
   3166     pyET = import_fresh_module('xml.etree.ElementTree',
   3167                                blocked=['_elementtree'])
   3168     if module is None:
   3169         module = pyET
   3170 
   3171     global ET
   3172     ET = module
   3173 
   3174     test_classes = [
   3175         ModuleTest,
   3176         ElementSlicingTest,
   3177         BasicElementTest,
   3178         BadElementTest,
   3179         BadElementPathTest,
   3180         ElementTreeTest,
   3181         IOTest,
   3182         ParseErrorTest,
   3183         XIncludeTest,
   3184         ElementTreeTypeTest,
   3185         ElementFindTest,
   3186         ElementIterTest,
   3187         TreeBuilderTest,
   3188         XMLParserTest,
   3189         XMLPullParserTest,
   3190         BugsTest,
   3191         ]
   3192 
   3193     # These tests will only run for the pure-Python version that doesn't import
   3194     # _elementtree. We can't use skipUnless here, because pyET is filled in only
   3195     # after the module is loaded.
   3196     if pyET is not ET:
   3197         test_classes.extend([
   3198             NoAcceleratorTest,
   3199             ])
   3200 
   3201     # Provide default namespace mapping and path cache.
   3202     from xml.etree import ElementPath
   3203     nsmap = ET.register_namespace._namespace_map
   3204     # Copy the default namespace mapping
   3205     nsmap_copy = nsmap.copy()
   3206     # Copy the path cache (should be empty)
   3207     path_cache = ElementPath._cache
   3208     ElementPath._cache = path_cache.copy()
   3209     try:
   3210         support.run_unittest(*test_classes)
   3211     finally:
   3212         from xml.etree import ElementPath
   3213         # Restore mapping and path cache
   3214         nsmap.clear()
   3215         nsmap.update(nsmap_copy)
   3216         ElementPath._cache = path_cache
   3217         # don't interfere with subsequent tests
   3218         ET = pyET = None
   3219 
   3220 
   3221 if __name__ == '__main__':
   3222     test_main()
   3223