Home | History | Annotate | Download | only in test
      1 # XXX TypeErrors on calling handlers, or on bad return values from a
      2 # handler, are obscure and unhelpful.
      3 
      4 import StringIO, sys
      5 import unittest
      6 
      7 from xml.parsers import expat
      8 
      9 from test import test_support
     10 from test.test_support import sortdict, run_unittest
     11 
     12 
     13 class SetAttributeTest(unittest.TestCase):
     14     def setUp(self):
     15         self.parser = expat.ParserCreate(namespace_separator='!')
     16 
     17     def test_buffer_text(self):
     18         self.assertIs(self.parser.buffer_text, False)
     19         for x in 0, 1, 2, 0:
     20             self.parser.buffer_text = x
     21             self.assertIs(self.parser.buffer_text, bool(x))
     22 
     23     def test_namespace_prefixes(self):
     24         self.assertIs(self.parser.namespace_prefixes, False)
     25         for x in 0, 1, 2, 0:
     26             self.parser.namespace_prefixes = x
     27             self.assertIs(self.parser.namespace_prefixes, bool(x))
     28 
     29     def test_returns_unicode(self):
     30         self.assertIs(self.parser.returns_unicode, test_support.have_unicode)
     31         for x in 0, 1, 2, 0:
     32             self.parser.returns_unicode = x
     33             self.assertIs(self.parser.returns_unicode, bool(x))
     34 
     35     def test_ordered_attributes(self):
     36         self.assertIs(self.parser.ordered_attributes, False)
     37         for x in 0, 1, 2, 0:
     38             self.parser.ordered_attributes = x
     39             self.assertIs(self.parser.ordered_attributes, bool(x))
     40 
     41     def test_specified_attributes(self):
     42         self.assertIs(self.parser.specified_attributes, False)
     43         for x in 0, 1, 2, 0:
     44             self.parser.specified_attributes = x
     45             self.assertIs(self.parser.specified_attributes, bool(x))
     46 
     47     def test_invalid_attributes(self):
     48         with self.assertRaises(AttributeError):
     49             self.parser.foo = 1
     50         with self.assertRaises(AttributeError):
     51             self.parser.foo
     52 
     53 
     54 data = '''\
     55 <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
     56 <?xml-stylesheet href="stylesheet.css"?>
     57 <!-- comment data -->
     58 <!DOCTYPE quotations SYSTEM "quotations.dtd" [
     59 <!ELEMENT root ANY>
     60 <!NOTATION notation SYSTEM "notation.jpeg">
     61 <!ENTITY acirc "&#226;">
     62 <!ENTITY external_entity SYSTEM "entity.file">
     63 <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
     64 %unparsed_entity;
     65 ]>
     66 
     67 <root attr1="value1" attr2="value2&#8000;">
     68 <myns:subelement xmlns:myns="http://www.python.org/namespace">
     69      Contents of subelements
     70 </myns:subelement>
     71 <sub2><![CDATA[contents of CDATA section]]></sub2>
     72 &external_entity;
     73 </root>
     74 '''
     75 
     76 
     77 # Produce UTF-8 output
     78 class ParseTest(unittest.TestCase):
     79     class Outputter:
     80         def __init__(self):
     81             self.out = []
     82 
     83         def StartElementHandler(self, name, attrs):
     84             self.out.append('Start element: ' + repr(name) + ' ' +
     85                             sortdict(attrs))
     86 
     87         def EndElementHandler(self, name):
     88             self.out.append('End element: ' + repr(name))
     89 
     90         def CharacterDataHandler(self, data):
     91             data = data.strip()
     92             if data:
     93                 self.out.append('Character data: ' + repr(data))
     94 
     95         def ProcessingInstructionHandler(self, target, data):
     96             self.out.append('PI: ' + repr(target) + ' ' + repr(data))
     97 
     98         def StartNamespaceDeclHandler(self, prefix, uri):
     99             self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
    100 
    101         def EndNamespaceDeclHandler(self, prefix):
    102             self.out.append('End of NS decl: ' + repr(prefix))
    103 
    104         def StartCdataSectionHandler(self):
    105             self.out.append('Start of CDATA section')
    106 
    107         def EndCdataSectionHandler(self):
    108             self.out.append('End of CDATA section')
    109 
    110         def CommentHandler(self, text):
    111             self.out.append('Comment: ' + repr(text))
    112 
    113         def NotationDeclHandler(self, *args):
    114             name, base, sysid, pubid = args
    115             self.out.append('Notation declared: %s' %(args,))
    116 
    117         def UnparsedEntityDeclHandler(self, *args):
    118             entityName, base, systemId, publicId, notationName = args
    119             self.out.append('Unparsed entity decl: %s' %(args,))
    120 
    121         def NotStandaloneHandler(self, userData):
    122             self.out.append('Not standalone')
    123             return 1
    124 
    125         def ExternalEntityRefHandler(self, *args):
    126             context, base, sysId, pubId = args
    127             self.out.append('External entity ref: %s' %(args[1:],))
    128             return 1
    129 
    130         def DefaultHandler(self, userData):
    131             pass
    132 
    133         def DefaultHandlerExpand(self, userData):
    134             pass
    135 
    136     handler_names = [
    137         'StartElementHandler', 'EndElementHandler',
    138         'CharacterDataHandler', 'ProcessingInstructionHandler',
    139         'UnparsedEntityDeclHandler', 'NotationDeclHandler',
    140         'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
    141         'CommentHandler', 'StartCdataSectionHandler',
    142         'EndCdataSectionHandler',
    143         'DefaultHandler', 'DefaultHandlerExpand',
    144         #'NotStandaloneHandler',
    145         'ExternalEntityRefHandler'
    146         ]
    147 
    148     def test_utf8(self):
    149 
    150         out = self.Outputter()
    151         parser = expat.ParserCreate(namespace_separator='!')
    152         for name in self.handler_names:
    153             setattr(parser, name, getattr(out, name))
    154         parser.returns_unicode = 0
    155         parser.Parse(data, 1)
    156 
    157         # Verify output
    158         op = out.out
    159         self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
    160         self.assertEqual(op[1], "Comment: ' comment data '")
    161         self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
    162         self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
    163         self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
    164         self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
    165         self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
    166         self.assertEqual(op[7], "Character data: 'Contents of subelements'")
    167         self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'")
    168         self.assertEqual(op[9], "End of NS decl: 'myns'")
    169         self.assertEqual(op[10], "Start element: 'sub2' {}")
    170         self.assertEqual(op[11], 'Start of CDATA section')
    171         self.assertEqual(op[12], "Character data: 'contents of CDATA section'")
    172         self.assertEqual(op[13], 'End of CDATA section')
    173         self.assertEqual(op[14], "End element: 'sub2'")
    174         self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)")
    175         self.assertEqual(op[16], "End element: 'root'")
    176 
    177     def test_unicode(self):
    178         # Try the parse again, this time producing Unicode output
    179         out = self.Outputter()
    180         parser = expat.ParserCreate(namespace_separator='!')
    181         parser.returns_unicode = 1
    182         for name in self.handler_names:
    183             setattr(parser, name, getattr(out, name))
    184 
    185         parser.Parse(data, 1)
    186 
    187         op = out.out
    188         self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
    189         self.assertEqual(op[1], "Comment: u' comment data '")
    190         self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
    191         self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
    192         self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
    193         self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
    194         self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
    195         self.assertEqual(op[7], "Character data: u'Contents of subelements'")
    196         self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
    197         self.assertEqual(op[9], "End of NS decl: u'myns'")
    198         self.assertEqual(op[10], "Start element: u'sub2' {}")
    199         self.assertEqual(op[11], 'Start of CDATA section')
    200         self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
    201         self.assertEqual(op[13], 'End of CDATA section')
    202         self.assertEqual(op[14], "End element: u'sub2'")
    203         self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
    204         self.assertEqual(op[16], "End element: u'root'")
    205 
    206     def test_parse_file(self):
    207         # Try parsing a file
    208         out = self.Outputter()
    209         parser = expat.ParserCreate(namespace_separator='!')
    210         parser.returns_unicode = 1
    211         for name in self.handler_names:
    212             setattr(parser, name, getattr(out, name))
    213         file = StringIO.StringIO(data)
    214 
    215         parser.ParseFile(file)
    216 
    217         op = out.out
    218         self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
    219         self.assertEqual(op[1], "Comment: u' comment data '")
    220         self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
    221         self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
    222         self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
    223         self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
    224         self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
    225         self.assertEqual(op[7], "Character data: u'Contents of subelements'")
    226         self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
    227         self.assertEqual(op[9], "End of NS decl: u'myns'")
    228         self.assertEqual(op[10], "Start element: u'sub2' {}")
    229         self.assertEqual(op[11], 'Start of CDATA section')
    230         self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
    231         self.assertEqual(op[13], 'End of CDATA section')
    232         self.assertEqual(op[14], "End element: u'sub2'")
    233         self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
    234         self.assertEqual(op[16], "End element: u'root'")
    235 
    236         # Issue 4877: expat.ParseFile causes segfault on a closed file.
    237         fp = open(test_support.TESTFN, 'wb')
    238         try:
    239             fp.close()
    240             parser = expat.ParserCreate()
    241             with self.assertRaises(ValueError):
    242                 parser.ParseFile(fp)
    243         finally:
    244             test_support.unlink(test_support.TESTFN)
    245 
    246     def test_parse_again(self):
    247         parser = expat.ParserCreate()
    248         file = StringIO.StringIO(data)
    249         parser.ParseFile(file)
    250         # Issue 6676: ensure a meaningful exception is raised when attempting
    251         # to parse more than one XML document per xmlparser instance,
    252         # a limitation of the Expat library.
    253         with self.assertRaises(expat.error) as cm:
    254             parser.ParseFile(file)
    255         self.assertEqual(expat.ErrorString(cm.exception.code),
    256                           expat.errors.XML_ERROR_FINISHED)
    257 
    258 class NamespaceSeparatorTest(unittest.TestCase):
    259     def test_legal(self):
    260         # Tests that make sure we get errors when the namespace_separator value
    261         # is illegal, and that we don't for good values:
    262         expat.ParserCreate()
    263         expat.ParserCreate(namespace_separator=None)
    264         expat.ParserCreate(namespace_separator=' ')
    265 
    266     def test_illegal(self):
    267         try:
    268             expat.ParserCreate(namespace_separator=42)
    269             self.fail()
    270         except TypeError, e:
    271             self.assertEqual(str(e),
    272                 'ParserCreate() argument 2 must be string or None, not int')
    273 
    274         try:
    275             expat.ParserCreate(namespace_separator='too long')
    276             self.fail()
    277         except ValueError, e:
    278             self.assertEqual(str(e),
    279                 'namespace_separator must be at most one character, omitted, or None')
    280 
    281     def test_zero_length(self):
    282         # ParserCreate() needs to accept a namespace_separator of zero length
    283         # to satisfy the requirements of RDF applications that are required
    284         # to simply glue together the namespace URI and the localname.  Though
    285         # considered a wart of the RDF specifications, it needs to be supported.
    286         #
    287         # See XML-SIG mailing list thread starting with
    288         # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
    289         #
    290         expat.ParserCreate(namespace_separator='') # too short
    291 
    292 
    293 class InterningTest(unittest.TestCase):
    294     def test(self):
    295         # Test the interning machinery.
    296         p = expat.ParserCreate()
    297         L = []
    298         def collector(name, *args):
    299             L.append(name)
    300         p.StartElementHandler = collector
    301         p.EndElementHandler = collector
    302         p.Parse("<e> <e/> <e></e> </e>", 1)
    303         tag = L[0]
    304         self.assertEqual(len(L), 6)
    305         for entry in L:
    306             # L should have the same string repeated over and over.
    307             self.assertTrue(tag is entry)
    308 
    309 
    310 class BufferTextTest(unittest.TestCase):
    311     def setUp(self):
    312         self.stuff = []
    313         self.parser = expat.ParserCreate()
    314         self.parser.buffer_text = 1
    315         self.parser.CharacterDataHandler = self.CharacterDataHandler
    316 
    317     def check(self, expected, label):
    318         self.assertEqual(self.stuff, expected,
    319                 "%s\nstuff    = %r\nexpected = %r"
    320                 % (label, self.stuff, map(unicode, expected)))
    321 
    322     def CharacterDataHandler(self, text):
    323         self.stuff.append(text)
    324 
    325     def StartElementHandler(self, name, attrs):
    326         self.stuff.append("<%s>" % name)
    327         bt = attrs.get("buffer-text")
    328         if bt == "yes":
    329             self.parser.buffer_text = 1
    330         elif bt == "no":
    331             self.parser.buffer_text = 0
    332 
    333     def EndElementHandler(self, name):
    334         self.stuff.append("</%s>" % name)
    335 
    336     def CommentHandler(self, data):
    337         self.stuff.append("<!--%s-->" % data)
    338 
    339     def setHandlers(self, handlers=[]):
    340         for name in handlers:
    341             setattr(self.parser, name, getattr(self, name))
    342 
    343     def test_default_to_disabled(self):
    344         parser = expat.ParserCreate()
    345         self.assertFalse(parser.buffer_text)
    346 
    347     def test_buffering_enabled(self):
    348         # Make sure buffering is turned on
    349         self.assertTrue(self.parser.buffer_text)
    350         self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
    351         self.assertEqual(self.stuff, ['123'],
    352                          "buffered text not properly collapsed")
    353 
    354     def test1(self):
    355         # XXX This test exposes more detail of Expat's text chunking than we
    356         # XXX like, but it tests what we need to concisely.
    357         self.setHandlers(["StartElementHandler"])
    358         self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
    359         self.assertEqual(self.stuff,
    360                          ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
    361                          "buffering control not reacting as expected")
    362 
    363     def test2(self):
    364         self.parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
    365         self.assertEqual(self.stuff, ["1<2> \n 3"],
    366                          "buffered text not properly collapsed")
    367 
    368     def test3(self):
    369         self.setHandlers(["StartElementHandler"])
    370         self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
    371         self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
    372                           "buffered text not properly split")
    373 
    374     def test4(self):
    375         self.setHandlers(["StartElementHandler", "EndElementHandler"])
    376         self.parser.CharacterDataHandler = None
    377         self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
    378         self.assertEqual(self.stuff,
    379                          ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
    380 
    381     def test5(self):
    382         self.setHandlers(["StartElementHandler", "EndElementHandler"])
    383         self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
    384         self.assertEqual(self.stuff,
    385             ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
    386 
    387     def test6(self):
    388         self.setHandlers(["CommentHandler", "EndElementHandler",
    389                     "StartElementHandler"])
    390         self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
    391         self.assertEqual(self.stuff,
    392             ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
    393             "buffered text not properly split")
    394 
    395     def test7(self):
    396         self.setHandlers(["CommentHandler", "EndElementHandler",
    397                     "StartElementHandler"])
    398         self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
    399         self.assertEqual(self.stuff,
    400                          ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
    401                           "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
    402                          "buffered text not properly split")
    403 
    404 
    405 # Test handling of exception from callback:
    406 class HandlerExceptionTest(unittest.TestCase):
    407     def StartElementHandler(self, name, attrs):
    408         raise RuntimeError(name)
    409 
    410     def test(self):
    411         parser = expat.ParserCreate()
    412         parser.StartElementHandler = self.StartElementHandler
    413         try:
    414             parser.Parse("<a><b><c/></b></a>", 1)
    415             self.fail()
    416         except RuntimeError, e:
    417             self.assertEqual(e.args[0], 'a',
    418                              "Expected RuntimeError for element 'a', but" + \
    419                              " found %r" % e.args[0])
    420 
    421 
    422 # Test Current* members:
    423 class PositionTest(unittest.TestCase):
    424     def StartElementHandler(self, name, attrs):
    425         self.check_pos('s')
    426 
    427     def EndElementHandler(self, name):
    428         self.check_pos('e')
    429 
    430     def check_pos(self, event):
    431         pos = (event,
    432                self.parser.CurrentByteIndex,
    433                self.parser.CurrentLineNumber,
    434                self.parser.CurrentColumnNumber)
    435         self.assertTrue(self.upto < len(self.expected_list),
    436                         'too many parser events')
    437         expected = self.expected_list[self.upto]
    438         self.assertEqual(pos, expected,
    439                 'Expected position %s, got position %s' %(pos, expected))
    440         self.upto += 1
    441 
    442     def test(self):
    443         self.parser = expat.ParserCreate()
    444         self.parser.StartElementHandler = self.StartElementHandler
    445         self.parser.EndElementHandler = self.EndElementHandler
    446         self.upto = 0
    447         self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
    448                               ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
    449 
    450         xml = '<a>\n <b>\n  <c/>\n </b>\n</a>'
    451         self.parser.Parse(xml, 1)
    452 
    453 
    454 class sf1296433Test(unittest.TestCase):
    455     def test_parse_only_xml_data(self):
    456         # http://python.org/sf/1296433
    457         #
    458         xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
    459         # this one doesn't crash
    460         #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
    461 
    462         class SpecificException(Exception):
    463             pass
    464 
    465         def handler(text):
    466             raise SpecificException
    467 
    468         parser = expat.ParserCreate()
    469         parser.CharacterDataHandler = handler
    470 
    471         self.assertRaises(Exception, parser.Parse, xml)
    472 
    473 class ChardataBufferTest(unittest.TestCase):
    474     """
    475     test setting of chardata buffer size
    476     """
    477 
    478     def test_1025_bytes(self):
    479         self.assertEqual(self.small_buffer_test(1025), 2)
    480 
    481     def test_1000_bytes(self):
    482         self.assertEqual(self.small_buffer_test(1000), 1)
    483 
    484     def test_wrong_size(self):
    485         parser = expat.ParserCreate()
    486         parser.buffer_text = 1
    487         with self.assertRaises(ValueError):
    488             parser.buffer_size = -1
    489         with self.assertRaises(ValueError):
    490             parser.buffer_size = 0
    491         with self.assertRaises(TypeError):
    492             parser.buffer_size = 512.0
    493         with self.assertRaises(TypeError):
    494             parser.buffer_size = sys.maxint+1
    495 
    496     def test_unchanged_size(self):
    497         xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
    498         xml2 = 'a'*512 + '</s>'
    499         parser = expat.ParserCreate()
    500         parser.CharacterDataHandler = self.counting_handler
    501         parser.buffer_size = 512
    502         parser.buffer_text = 1
    503 
    504         # Feed 512 bytes of character data: the handler should be called
    505         # once.
    506         self.n = 0
    507         parser.Parse(xml1)
    508         self.assertEqual(self.n, 1)
    509 
    510         # Reassign to buffer_size, but assign the same size.
    511         parser.buffer_size = parser.buffer_size
    512         self.assertEqual(self.n, 1)
    513 
    514         # Try parsing rest of the document
    515         parser.Parse(xml2)
    516         self.assertEqual(self.n, 2)
    517 
    518 
    519     def test_disabling_buffer(self):
    520         xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
    521         xml2 = ('b' * 1024)
    522         xml3 = "%s</a>" % ('c' * 1024)
    523         parser = expat.ParserCreate()
    524         parser.CharacterDataHandler = self.counting_handler
    525         parser.buffer_text = 1
    526         parser.buffer_size = 1024
    527         self.assertEqual(parser.buffer_size, 1024)
    528 
    529         # Parse one chunk of XML
    530         self.n = 0
    531         parser.Parse(xml1, 0)
    532         self.assertEqual(parser.buffer_size, 1024)
    533         self.assertEqual(self.n, 1)
    534 
    535         # Turn off buffering and parse the next chunk.
    536         parser.buffer_text = 0
    537         self.assertFalse(parser.buffer_text)
    538         self.assertEqual(parser.buffer_size, 1024)
    539         for i in range(10):
    540             parser.Parse(xml2, 0)
    541         self.assertEqual(self.n, 11)
    542 
    543         parser.buffer_text = 1
    544         self.assertTrue(parser.buffer_text)
    545         self.assertEqual(parser.buffer_size, 1024)
    546         parser.Parse(xml3, 1)
    547         self.assertEqual(self.n, 12)
    548 
    549 
    550 
    551     def make_document(self, bytes):
    552         return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
    553 
    554     def counting_handler(self, text):
    555         self.n += 1
    556 
    557     def small_buffer_test(self, buffer_len):
    558         xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
    559         parser = expat.ParserCreate()
    560         parser.CharacterDataHandler = self.counting_handler
    561         parser.buffer_size = 1024
    562         parser.buffer_text = 1
    563 
    564         self.n = 0
    565         parser.Parse(xml)
    566         return self.n
    567 
    568     def test_change_size_1(self):
    569         xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
    570         xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
    571         parser = expat.ParserCreate()
    572         parser.CharacterDataHandler = self.counting_handler
    573         parser.buffer_text = 1
    574         parser.buffer_size = 1024
    575         self.assertEqual(parser.buffer_size, 1024)
    576 
    577         self.n = 0
    578         parser.Parse(xml1, 0)
    579         parser.buffer_size *= 2
    580         self.assertEqual(parser.buffer_size, 2048)
    581         parser.Parse(xml2, 1)
    582         self.assertEqual(self.n, 2)
    583 
    584     def test_change_size_2(self):
    585         xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
    586         xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
    587         parser = expat.ParserCreate()
    588         parser.CharacterDataHandler = self.counting_handler
    589         parser.buffer_text = 1
    590         parser.buffer_size = 2048
    591         self.assertEqual(parser.buffer_size, 2048)
    592 
    593         self.n=0
    594         parser.Parse(xml1, 0)
    595         parser.buffer_size //= 2
    596         self.assertEqual(parser.buffer_size, 1024)
    597         parser.Parse(xml2, 1)
    598         self.assertEqual(self.n, 4)
    599 
    600 class MalformedInputText(unittest.TestCase):
    601     def test1(self):
    602         xml = "\0\r\n"
    603         parser = expat.ParserCreate()
    604         try:
    605             parser.Parse(xml, True)
    606             self.fail()
    607         except expat.ExpatError as e:
    608             self.assertEqual(str(e), 'unclosed token: line 2, column 0')
    609 
    610     def test2(self):
    611         xml = "<?xml version\xc2\x85='1.0'?>\r\n"
    612         parser = expat.ParserCreate()
    613         err_pattern = r'XML declaration not well-formed: line 1, column \d+'
    614         with self.assertRaisesRegexp(expat.ExpatError, err_pattern):
    615             parser.Parse(xml, True)
    616 
    617 class ForeignDTDTests(unittest.TestCase):
    618     """
    619     Tests for the UseForeignDTD method of expat parser objects.
    620     """
    621     def test_use_foreign_dtd(self):
    622         """
    623         If UseForeignDTD is passed True and a document without an external
    624         entity reference is parsed, ExternalEntityRefHandler is first called
    625         with None for the public and system ids.
    626         """
    627         handler_call_args = []
    628         def resolve_entity(context, base, system_id, public_id):
    629             handler_call_args.append((public_id, system_id))
    630             return 1
    631 
    632         parser = expat.ParserCreate()
    633         parser.UseForeignDTD(True)
    634         parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    635         parser.ExternalEntityRefHandler = resolve_entity
    636         parser.Parse("<?xml version='1.0'?><element/>")
    637         self.assertEqual(handler_call_args, [(None, None)])
    638 
    639         # test UseForeignDTD() is equal to UseForeignDTD(True)
    640         handler_call_args[:] = []
    641 
    642         parser = expat.ParserCreate()
    643         parser.UseForeignDTD()
    644         parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    645         parser.ExternalEntityRefHandler = resolve_entity
    646         parser.Parse("<?xml version='1.0'?><element/>")
    647         self.assertEqual(handler_call_args, [(None, None)])
    648 
    649     def test_ignore_use_foreign_dtd(self):
    650         """
    651         If UseForeignDTD is passed True and a document with an external
    652         entity reference is parsed, ExternalEntityRefHandler is called with
    653         the public and system ids from the document.
    654         """
    655         handler_call_args = []
    656         def resolve_entity(context, base, system_id, public_id):
    657             handler_call_args.append((public_id, system_id))
    658             return 1
    659 
    660         parser = expat.ParserCreate()
    661         parser.UseForeignDTD(True)
    662         parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
    663         parser.ExternalEntityRefHandler = resolve_entity
    664         parser.Parse(
    665             "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
    666         self.assertEqual(handler_call_args, [("bar", "baz")])
    667 
    668 
    669 def test_main():
    670     run_unittest(SetAttributeTest,
    671                  ParseTest,
    672                  NamespaceSeparatorTest,
    673                  InterningTest,
    674                  BufferTextTest,
    675                  HandlerExceptionTest,
    676                  PositionTest,
    677                  sf1296433Test,
    678                  ChardataBufferTest,
    679                  MalformedInputText,
    680                  ForeignDTDTests)
    681 
    682 if __name__ == "__main__":
    683     test_main()
    684