Home | History | Annotate | Download | only in test
      1 # XXX TypeErrors on calling handlers, or on bad return values from a
      2 # handler, are obscure and unhelpful.
      3 
      4 from io import BytesIO
      5 import os
      6 import sys
      7 import sysconfig
      8 import unittest
      9 import traceback
     10 
     11 from xml.parsers import expat
     12 from xml.parsers.expat import errors
     13 
     14 from test.support import sortdict
     15 
     16 
     17 class SetAttributeTest(unittest.TestCase):
     18     def setUp(self):
     19         self.parser = expat.ParserCreate(namespace_separator='!')
     20 
     21     def test_buffer_text(self):
     22         self.assertIs(self.parser.buffer_text, False)
     23         for x in 0, 1, 2, 0:
     24             self.parser.buffer_text = x
     25             self.assertIs(self.parser.buffer_text, bool(x))
     26 
     27     def test_namespace_prefixes(self):
     28         self.assertIs(self.parser.namespace_prefixes, False)
     29         for x in 0, 1, 2, 0:
     30             self.parser.namespace_prefixes = x
     31             self.assertIs(self.parser.namespace_prefixes, bool(x))
     32 
     33     def test_ordered_attributes(self):
     34         self.assertIs(self.parser.ordered_attributes, False)
     35         for x in 0, 1, 2, 0:
     36             self.parser.ordered_attributes = x
     37             self.assertIs(self.parser.ordered_attributes, bool(x))
     38 
     39     def test_specified_attributes(self):
     40         self.assertIs(self.parser.specified_attributes, False)
     41         for x in 0, 1, 2, 0:
     42             self.parser.specified_attributes = x
     43             self.assertIs(self.parser.specified_attributes, bool(x))
     44 
     45     def test_invalid_attributes(self):
     46         with self.assertRaises(AttributeError):
     47             self.parser.returns_unicode = 1
     48         with self.assertRaises(AttributeError):
     49             self.parser.returns_unicode
     50 
     51         # Issue #25019
     52         self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
     53         self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
     54         self.assertRaises(TypeError, getattr, self.parser, range(0xF))
     55 
     56 
     57 data = b'''\
     58 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
     59 <?xml-stylesheet href="stylesheet.css"?>
     60 <!-- comment data -->
     61 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
     62 <!ELEMENT root ANY>
     63 <!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
     64 <!NOTATION notation SYSTEM "notation.jpeg">
     65 <!ENTITY acirc "&#226;">
     66 <!ENTITY external_entity SYSTEM "entity.file">
     67 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
     68 %unparsed_entity;
     69 ]>
     70 
     71 <root attr1="value1" attr2="value2&#8000;">
     72 <myns:subelement xmlns:myns="http://www.python.org/namespace">
     73      Contents of subelements
     74 </myns:subelement>
     75 <sub2><![CDATA[contents of CDATA section]]></sub2>
     76 &external_entity;
     77 &skipped_entity;
     78 \xb5
     79 </root>
     80 '''
     81 
     82 
     83 # Produce UTF-8 output
     84 class ParseTest(unittest.TestCase):
     85     class Outputter:
     86         def __init__(self):
     87             self.out = []
     88 
     89         def StartElementHandler(self, name, attrs):
     90             self.out.append('Start element: ' + repr(name) + ' ' +
     91                             sortdict(attrs))
     92 
     93         def EndElementHandler(self, name):
     94             self.out.append('End element: ' + repr(name))
     95 
     96         def CharacterDataHandler(self, data):
     97             data = data.strip()
     98             if data:
     99                 self.out.append('Character data: ' + repr(data))
    100 
    101         def ProcessingInstructionHandler(self, target, data):
    102             self.out.append('PI: ' + repr(target) + ' ' + repr(data))
    103 
    104         def StartNamespaceDeclHandler(self, prefix, uri):
    105             self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
    106 
    107         def EndNamespaceDeclHandler(self, prefix):
    108             self.out.append('End of NS decl: ' + repr(prefix))
    109 
    110         def StartCdataSectionHandler(self):
    111             self.out.append('Start of CDATA section')
    112 
    113         def EndCdataSectionHandler(self):
    114             self.out.append('End of CDATA section')
    115 
    116         def CommentHandler(self, text):
    117             self.out.append('Comment: ' + repr(text))
    118 
    119         def NotationDeclHandler(self, *args):
    120             name, base, sysid, pubid = args
    121             self.out.append('Notation declared: %s' %(args,))
    122 
    123         def UnparsedEntityDeclHandler(self, *args):
    124             entityName, base, systemId, publicId, notationName = args
    125             self.out.append('Unparsed entity decl: %s' %(args,))
    126 
    127         def NotStandaloneHandler(self):
    128             self.out.append('Not standalone')
    129             return 1
    130 
    131         def ExternalEntityRefHandler(self, *args):
    132             context, base, sysId, pubId = args
    133             self.out.append('External entity ref: %s' %(args[1:],))
    134             return 1
    135 
    136         def StartDoctypeDeclHandler(self, *args):
    137             self.out.append(('Start doctype', args))
    138             return 1
    139 
    140         def EndDoctypeDeclHandler(self):
    141             self.out.append("End doctype")
    142             return 1
    143 
    144         def EntityDeclHandler(self, *args):
    145             self.out.append(('Entity declaration', args))
    146             return 1
    147 
    148         def XmlDeclHandler(self, *args):
    149             self.out.append(('XML declaration', args))
    150             return 1
    151 
    152         def ElementDeclHandler(self, *args):
    153             self.out.append(('Element declaration', args))
    154             return 1
    155 
    156         def AttlistDeclHandler(self, *args):
    157             self.out.append(('Attribute list declaration', args))
    158             return 1
    159 
    160         def SkippedEntityHandler(self, *args):
    161             self.out.append(("Skipped entity", args))
    162             return 1
    163 
    164         def DefaultHandler(self, userData):
    165             pass
    166 
    167         def DefaultHandlerExpand(self, userData):
    168             pass
    169 
    170     handler_names = [
    171         'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
    172         'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
    173         'NotationDeclHandler', 'StartNamespaceDeclHandler',
    174         'EndNamespaceDeclHandler', 'CommentHandler',
    175         'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
    176         'DefaultHandlerExpand', 'NotStandaloneHandler',
    177         'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
    178         'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
    179         'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
    180         ]
    181 
    182     def _hookup_callbacks(self, parser, handler):
    183         """
    184         Set each of the callbacks defined on handler and named in
    185         self.handler_names on the given parser.
    186         """
    187         for name in self.handler_names:
    188             setattr(parser, name, getattr(handler, name))
    189 
    190     def _verify_parse_output(self, operations):
    191         expected_operations = [
    192             ('XML declaration', ('1.0', 'iso-8859-1', 0)),
    193             'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
    194             "Comment: ' comment data '",
    195             "Not standalone",
    196             ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
    197             ('Element declaration', ('root', (2, 0, None, ()))),
    198             ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
    199                 1)),
    200             ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
    201                 0)),
    202             "Notation declared: ('notation', None, 'notation.jpeg', None)",
    203             ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
    204             ('Entity declaration', ('external_entity', 0, None, None,
    205                 'entity.file', None, None)),
    206             "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
    207             "Not standalone",
    208             "End doctype",
    209             "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
    210             "NS decl: 'myns' 'http://www.python.org/namespace'",
    211             "Start element: 'http://www.python.org/namespace!subelement' {}",
    212             "Character data: 'Contents of subelements'",
    213             "End element: 'http://www.python.org/namespace!subelement'",
    214             "End of NS decl: 'myns'",
    215             "Start element: 'sub2' {}",
    216             'Start of CDATA section',
    217             "Character data: 'contents of CDATA section'",
    218             'End of CDATA section',
    219             "End element: 'sub2'",
    220             "External entity ref: (None, 'entity.file', None)",
    221             ('Skipped entity', ('skipped_entity', 0)),
    222             "Character data: '\xb5'",
    223             "End element: 'root'",
    224         ]
    225         for operation, expected_operation in zip(operations, expected_operations):
    226             self.assertEqual(operation, expected_operation)
    227 
    228     def test_parse_bytes(self):
    229         out = self.Outputter()
    230         parser = expat.ParserCreate(namespace_separator='!')
    231         self._hookup_callbacks(parser, out)
    232 
    233         parser.Parse(data, 1)
    234 
    235         operations = out.out
    236         self._verify_parse_output(operations)
    237         # Issue #6697.
    238         self.assertRaises(AttributeError, getattr, parser, '\uD800')
    239 
    240     def test_parse_str(self):
    241         out = self.Outputter()
    242         parser = expat.ParserCreate(namespace_separator='!')
    243         self._hookup_callbacks(parser, out)
    244 
    245         parser.Parse(data.decode('iso-8859-1'), 1)
    246 
    247         operations = out.out
    248         self._verify_parse_output(operations)
    249 
    250     def test_parse_file(self):
    251         # Try parsing a file
    252         out = self.Outputter()
    253         parser = expat.ParserCreate(namespace_separator='!')
    254         self._hookup_callbacks(parser, out)
    255         file = BytesIO(data)
    256 
    257         parser.ParseFile(file)
    258 
    259         operations = out.out
    260         self._verify_parse_output(operations)
    261 
    262     def test_parse_again(self):
    263         parser = expat.ParserCreate()
    264         file = BytesIO(data)
    265         parser.ParseFile(file)
    266         # Issue 6676: ensure a meaningful exception is raised when attempting
    267         # to parse more than one XML document per xmlparser instance,
    268         # a limitation of the Expat library.
    269         with self.assertRaises(expat.error) as cm:
    270             parser.ParseFile(file)
    271         self.assertEqual(expat.ErrorString(cm.exception.code),
    272                           expat.errors.XML_ERROR_FINISHED)
    273 
    274 class NamespaceSeparatorTest(unittest.TestCase):
    275     def test_legal(self):
    276         # Tests that make sure we get errors when the namespace_separator value
    277         # is illegal, and that we don't for good values:
    278         expat.ParserCreate()
    279         expat.ParserCreate(namespace_separator=None)
    280         expat.ParserCreate(namespace_separator=' ')
    281 
    282     def test_illegal(self):
    283         try:
    284             expat.ParserCreate(namespace_separator=42)
    285             self.fail()
    286         except TypeError as e:
    287             self.assertEqual(str(e),
    288                 'ParserCreate() argument 2 must be str or None, not int')
    289 
    290         try:
    291             expat.ParserCreate(namespace_separator='too long')
    292             self.fail()
    293         except ValueError as e:
    294             self.assertEqual(str(e),
    295                 'namespace_separator must be at most one character, omitted, or None')
    296 
    297     def test_zero_length(self):
    298         # ParserCreate() needs to accept a namespace_separator of zero length
    299         # to satisfy the requirements of RDF applications that are required
    300         # to simply glue together the namespace URI and the localname.  Though
    301         # considered a wart of the RDF specifications, it needs to be supported.
    302         #
    303         # See XML-SIG mailing list thread starting with
    304         # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
    305         #
    306         expat.ParserCreate(namespace_separator='') # too short
    307 
    308 
    309 class InterningTest(unittest.TestCase):
    310     def test(self):
    311         # Test the interning machinery.
    312         p = expat.ParserCreate()
    313         L = []
    314         def collector(name, *args):
    315             L.append(name)
    316         p.StartElementHandler = collector
    317         p.EndElementHandler = collector
    318         p.Parse(b"<e> <e/> <e></e> </e>", 1)
    319         tag = L[0]
    320         self.assertEqual(len(L), 6)
    321         for entry in L:
    322             # L should have the same string repeated over and over.
    323             self.assertTrue(tag is entry)
    324 
    325     def test_issue9402(self):
    326         # create an ExternalEntityParserCreate with buffer text
    327         class ExternalOutputter:
    328             def __init__(self, parser):
    329                 self.parser = parser
    330                 self.parser_result = None
    331 
    332             def ExternalEntityRefHandler(self, context, base, sysId, pubId):
    333                 external_parser = self.parser.ExternalEntityParserCreate("")
    334                 self.parser_result = external_parser.Parse(b"", 1)
    335                 return 1
    336 
    337         parser = expat.ParserCreate(namespace_separator='!')
    338         parser.buffer_text = 1
    339         out = ExternalOutputter(parser)
    340         parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
    341         parser.Parse(data, 1)
    342         self.assertEqual(out.parser_result, 1)
    343 
    344 
    345 class BufferTextTest(unittest.TestCase):
    346     def setUp(self):
    347         self.stuff = []
    348         self.parser = expat.ParserCreate()
    349         self.parser.buffer_text = 1
    350         self.parser.CharacterDataHandler = self.CharacterDataHandler
    351 
    352     def check(self, expected, label):
    353         self.assertEqual(self.stuff, expected,
    354                 "%s\nstuff    = %r\nexpected = %r"
    355                 % (label, self.stuff, map(str, expected)))
    356 
    357     def CharacterDataHandler(self, text):
    358         self.stuff.append(text)
    359 
    360     def StartElementHandler(self, name, attrs):
    361         self.stuff.append("<%s>" % name)
    362         bt = attrs.get("buffer-text")
    363         if bt == "yes":
    364             self.parser.buffer_text = 1
    365         elif bt == "no":
    366             self.parser.buffer_text = 0
    367 
    368     def EndElementHandler(self, name):
    369         self.stuff.append("</%s>" % name)
    370 
    371     def CommentHandler(self, data):
    372         self.stuff.append("<!--%s-->" % data)
    373 
    374     def setHandlers(self, handlers=[]):
    375         for name in handlers:
    376             setattr(self.parser, name, getattr(self, name))
    377 
    378     def test_default_to_disabled(self):
    379         parser = expat.ParserCreate()
    380         self.assertFalse(parser.buffer_text)
    381 
    382     def test_buffering_enabled(self):
    383         # Make sure buffering is turned on
    384         self.assertTrue(self.parser.buffer_text)
    385         self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
    386         self.assertEqual(self.stuff, ['123'],
    387                          "buffered text not properly collapsed")
    388 
    389     def test1(self):
    390         # XXX This test exposes more detail of Expat's text chunking than we
    391         # XXX like, but it tests what we need to concisely.
    392         self.setHandlers(["StartElementHandler"])
    393         self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
    394         self.assertEqual(self.stuff,
    395                          ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
    396                          "buffering control not reacting as expected")
    397 
    398     def test2(self):
    399         self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
    400         self.assertEqual(self.stuff, ["1<2> \n 3"],
    401                          "buffered text not properly collapsed")
    402 
    403     def test3(self):
    404         self.setHandlers(["StartElementHandler"])
    405         self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
    406         self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
    407                          "buffered text not properly split")
    408 
    409     def test4(self):
    410         self.setHandlers(["StartElementHandler", "EndElementHandler"])
    411         self.parser.CharacterDataHandler = None
    412         self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
    413         self.assertEqual(self.stuff,
    414                          ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
    415 
    416     def test5(self):
    417         self.setHandlers(["StartElementHandler", "EndElementHandler"])
    418         self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
    419         self.assertEqual(self.stuff,
    420             ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
    421 
    422     def test6(self):
    423         self.setHandlers(["CommentHandler", "EndElementHandler",
    424                     "StartElementHandler"])
    425         self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
    426         self.assertEqual(self.stuff,
    427             ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
    428             "buffered text not properly split")
    429 
    430     def test7(self):
    431         self.setHandlers(["CommentHandler", "EndElementHandler",
    432                     "StartElementHandler"])
    433         self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
    434         self.assertEqual(self.stuff,
    435                          ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
    436                           "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
    437                          "buffered text not properly split")
    438 
    439 
    440 # Test handling of exception from callback:
    441 class HandlerExceptionTest(unittest.TestCase):
    442     def StartElementHandler(self, name, attrs):
    443         raise RuntimeError(name)
    444 
    445     def check_traceback_entry(self, entry, filename, funcname):
    446         self.assertEqual(os.path.basename(entry[0]), filename)
    447         self.assertEqual(entry[2], funcname)
    448 
    449     def test_exception(self):
    450         parser = expat.ParserCreate()
    451         parser.StartElementHandler = self.StartElementHandler
    452         try:
    453             parser.Parse(b"<a><b><c/></b></a>", 1)
    454             self.fail()
    455         except RuntimeError as e:
    456             self.assertEqual(e.args[0], 'a',
    457                              "Expected RuntimeError for element 'a', but" + \
    458                              " found %r" % e.args[0])
    459             # Check that the traceback contains the relevant line in pyexpat.c
    460             entries = traceback.extract_tb(e.__traceback__)
    461             self.assertEqual(len(entries), 3)
    462             self.check_traceback_entry(entries[0],
    463                                        "test_pyexpat.py", "test_exception")
    464             self.check_traceback_entry(entries[1],
    465                                        "pyexpat.c", "StartElement")
    466             self.check_traceback_entry(entries[2],
    467                                        "test_pyexpat.py", "StartElementHandler")
    468             if sysconfig.is_python_build():
    469                 self.assertIn('call_with_frame("StartElement"', entries[1][3])
    470 
    471 
    472 # Test Current* members:
    473 class PositionTest(unittest.TestCase):
    474     def StartElementHandler(self, name, attrs):
    475         self.check_pos('s')
    476 
    477     def EndElementHandler(self, name):
    478         self.check_pos('e')
    479 
    480     def check_pos(self, event):
    481         pos = (event,
    482                self.parser.CurrentByteIndex,
    483                self.parser.CurrentLineNumber,
    484                self.parser.CurrentColumnNumber)
    485         self.assertTrue(self.upto < len(self.expected_list),
    486                         'too many parser events')
    487         expected = self.expected_list[self.upto]
    488         self.assertEqual(pos, expected,
    489                 'Expected position %s, got position %s' %(pos, expected))
    490         self.upto += 1
    491 
    492     def test(self):
    493         self.parser = expat.ParserCreate()
    494         self.parser.StartElementHandler = self.StartElementHandler
    495         self.parser.EndElementHandler = self.EndElementHandler
    496         self.upto = 0
    497         self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
    498                               ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
    499 
    500         xml = b'<a>\n <b>\n  <c/>\n </b>\n</a>'
    501         self.parser.Parse(xml, 1)
    502 
    503 
    504 class sf1296433Test(unittest.TestCase):
    505     def test_parse_only_xml_data(self):
    506         # http://python.org/sf/1296433
    507         #
    508         xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
    509         # this one doesn't crash
    510         #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
    511 
    512         class SpecificException(Exception):
    513             pass
    514 
    515         def handler(text):
    516             raise SpecificException
    517 
    518         parser = expat.ParserCreate()
    519         parser.CharacterDataHandler = handler
    520 
    521         self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
    522 
    523 class ChardataBufferTest(unittest.TestCase):
    524     """
    525     test setting of chardata buffer size
    526     """
    527 
    528     def test_1025_bytes(self):
    529         self.assertEqual(self.small_buffer_test(1025), 2)
    530 
    531     def test_1000_bytes(self):
    532         self.assertEqual(self.small_buffer_test(1000), 1)
    533 
    534     def test_wrong_size(self):
    535         parser = expat.ParserCreate()
    536         parser.buffer_text = 1
    537         with self.assertRaises(ValueError):
    538             parser.buffer_size = -1
    539         with self.assertRaises(ValueError):
    540             parser.buffer_size = 0
    541         with self.assertRaises((ValueError, OverflowError)):
    542             parser.buffer_size = sys.maxsize + 1
    543         with self.assertRaises(TypeError):
    544             parser.buffer_size = 512.0
    545 
    546     def test_unchanged_size(self):
    547         xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
    548         xml2 = b'a'*512 + b'</s>'
    549         parser = expat.ParserCreate()
    550         parser.CharacterDataHandler = self.counting_handler
    551         parser.buffer_size = 512
    552         parser.buffer_text = 1
    553 
    554         # Feed 512 bytes of character data: the handler should be called
    555         # once.
    556         self.n = 0
    557         parser.Parse(xml1)
    558         self.assertEqual(self.n, 1)
    559 
    560         # Reassign to buffer_size, but assign the same size.
    561         parser.buffer_size = parser.buffer_size
    562         self.assertEqual(self.n, 1)
    563 
    564         # Try parsing rest of the document
    565         parser.Parse(xml2)
    566         self.assertEqual(self.n, 2)
    567 
    568 
    569     def test_disabling_buffer(self):
    570         xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
    571         xml2 = b'b' * 1024
    572         xml3 = b'c' * 1024 + b'</a>';
    573         parser = expat.ParserCreate()
    574         parser.CharacterDataHandler = self.counting_handler
    575         parser.buffer_text = 1
    576         parser.buffer_size = 1024
    577         self.assertEqual(parser.buffer_size, 1024)
    578 
    579         # Parse one chunk of XML
    580         self.n = 0
    581         parser.Parse(xml1, 0)
    582         self.assertEqual(parser.buffer_size, 1024)
    583         self.assertEqual(self.n, 1)
    584 
    585         # Turn off buffering and parse the next chunk.
    586         parser.buffer_text = 0
    587         self.assertFalse(parser.buffer_text)
    588         self.assertEqual(parser.buffer_size, 1024)
    589         for i in range(10):
    590             parser.Parse(xml2, 0)
    591         self.assertEqual(self.n, 11)
    592 
    593         parser.buffer_text = 1
    594         self.assertTrue(parser.buffer_text)
    595         self.assertEqual(parser.buffer_size, 1024)
    596         parser.Parse(xml3, 1)
    597         self.assertEqual(self.n, 12)
    598 
    599     def counting_handler(self, text):
    600         self.n += 1
    601 
    602     def small_buffer_test(self, buffer_len):
    603         xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
    604         parser = expat.ParserCreate()
    605         parser.CharacterDataHandler = self.counting_handler
    606         parser.buffer_size = 1024
    607         parser.buffer_text = 1
    608 
    609         self.n = 0
    610         parser.Parse(xml)
    611         return self.n
    612 
    613     def test_change_size_1(self):
    614         xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
    615         xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
    616         parser = expat.ParserCreate()
    617         parser.CharacterDataHandler = self.counting_handler
    618         parser.buffer_text = 1
    619         parser.buffer_size = 1024
    620         self.assertEqual(parser.buffer_size, 1024)
    621 
    622         self.n = 0
    623         parser.Parse(xml1, 0)
    624         parser.buffer_size *= 2
    625         self.assertEqual(parser.buffer_size, 2048)
    626         parser.Parse(xml2, 1)
    627         self.assertEqual(self.n, 2)
    628 
    629     def test_change_size_2(self):
    630         xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
    631         xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
    632         parser = expat.ParserCreate()
    633         parser.CharacterDataHandler = self.counting_handler
    634         parser.buffer_text = 1
    635         parser.buffer_size = 2048
    636         self.assertEqual(parser.buffer_size, 2048)
    637 
    638         self.n=0
    639         parser.Parse(xml1, 0)
    640         parser.buffer_size = parser.buffer_size // 2
    641         self.assertEqual(parser.buffer_size, 1024)
    642         parser.Parse(xml2, 1)
    643         self.assertEqual(self.n, 4)
    644 
    645 class MalformedInputTest(unittest.TestCase):
    646     def test1(self):
    647         xml = b"\0\r\n"
    648         parser = expat.ParserCreate()
    649         try:
    650             parser.Parse(xml, True)
    651             self.fail()
    652         except expat.ExpatError as e:
    653             self.assertEqual(str(e), 'unclosed token: line 2, column 0')
    654 
    655     def test2(self):
    656         # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
    657         xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
    658         parser = expat.ParserCreate()
    659         err_pattern = r'XML declaration not well-formed: line 1, column \d+'
    660         with self.assertRaisesRegex(expat.ExpatError, err_pattern):
    661             parser.Parse(xml, True)
    662 
    663 class ErrorMessageTest(unittest.TestCase):
    664     def test_codes(self):
    665         # verify mapping of errors.codes and errors.messages
    666         self.assertEqual(errors.XML_ERROR_SYNTAX,
    667                          errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
    668 
    669     def test_expaterror(self):
    670         xml = b'<'
    671         parser = expat.ParserCreate()
    672         try:
    673             parser.Parse(xml, True)
    674             self.fail()
    675         except expat.ExpatError as e:
    676             self.assertEqual(e.code,
    677                              errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
    678 
    679 
    680 class ForeignDTDTests(unittest.TestCase):
    681     """
    682     Tests for the UseForeignDTD method of expat parser objects.
    683     """
    684     def test_use_foreign_dtd(self):
    685         """
    686         If UseForeignDTD is passed True and a document without an external
    687         entity reference is parsed, ExternalEntityRefHandler is first called
    688         with None for the public and system ids.
    689         """
    690         handler_call_args = []
    691         def resolve_entity(context, base, system_id, public_id):
    692             handler_call_args.append((public_id, system_id))
    693             return 1
    694 
    695         parser = expat.ParserCreate()
    696         parser.UseForeignDTD(True)
    697         parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    698         parser.ExternalEntityRefHandler = resolve_entity
    699         parser.Parse(b"<?xml version='1.0'?><element/>")
    700         self.assertEqual(handler_call_args, [(None, None)])
    701 
    702         # test UseForeignDTD() is equal to UseForeignDTD(True)
    703         handler_call_args[:] = []
    704 
    705         parser = expat.ParserCreate()
    706         parser.UseForeignDTD()
    707         parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    708         parser.ExternalEntityRefHandler = resolve_entity
    709         parser.Parse(b"<?xml version='1.0'?><element/>")
    710         self.assertEqual(handler_call_args, [(None, None)])
    711 
    712     def test_ignore_use_foreign_dtd(self):
    713         """
    714         If UseForeignDTD is passed True and a document with an external
    715         entity reference is parsed, ExternalEntityRefHandler is called with
    716         the public and system ids from the document.
    717         """
    718         handler_call_args = []
    719         def resolve_entity(context, base, system_id, public_id):
    720             handler_call_args.append((public_id, system_id))
    721             return 1
    722 
    723         parser = expat.ParserCreate()
    724         parser.UseForeignDTD(True)
    725         parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    726         parser.ExternalEntityRefHandler = resolve_entity
    727         parser.Parse(
    728             b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
    729         self.assertEqual(handler_call_args, [("bar", "baz")])
    730 
    731 
    732 if __name__ == "__main__":
    733     unittest.main()
    734