Home | History | Annotate | Download | only in test
      1 import io
      2 import unittest
      3 import xml.sax
      4 
      5 from xml.sax.xmlreader import AttributesImpl
      6 from xml.dom import pulldom
      7 
      8 from test.support import findfile
      9 
     10 
     11 tstfile = findfile("test.xml", subdir="xmltestdata")
     12 
     13 # A handy XML snippet, containing attributes, a namespace prefix, and a
     14 # self-closing tag:
     15 SMALL_SAMPLE = """<?xml version="1.0"?>
     16 <html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
     17 <!-- A comment -->
     18 <title>Introduction to XSL</title>
     19 <hr/>
     20 <p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
     21 </html>"""
     22 
     23 
     24 class PullDOMTestCase(unittest.TestCase):
     25 
     26     def test_parse(self):
     27         """Minimal test of DOMEventStream.parse()"""
     28 
     29         # This just tests that parsing from a stream works. Actual parser
     30         # semantics are tested using parseString with a more focused XML
     31         # fragment.
     32 
     33         # Test with a filename:
     34         handler = pulldom.parse(tstfile)
     35         self.addCleanup(handler.stream.close)
     36         list(handler)
     37 
     38         # Test with a file object:
     39         with open(tstfile, "rb") as fin:
     40             list(pulldom.parse(fin))
     41 
     42     def test_parse_semantics(self):
     43         """Test DOMEventStream parsing semantics."""
     44 
     45         items = pulldom.parseString(SMALL_SAMPLE)
     46         evt, node = next(items)
     47         # Just check the node is a Document:
     48         self.assertTrue(hasattr(node, "createElement"))
     49         self.assertEqual(pulldom.START_DOCUMENT, evt)
     50         evt, node = next(items)
     51         self.assertEqual(pulldom.START_ELEMENT, evt)
     52         self.assertEqual("html", node.tagName)
     53         self.assertEqual(2, len(node.attributes))
     54         self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
     55               "http://www.xml.com/books")
     56         evt, node = next(items)
     57         self.assertEqual(pulldom.CHARACTERS, evt) # Line break
     58         evt, node = next(items)
     59         # XXX - A comment should be reported here!
     60         # self.assertEqual(pulldom.COMMENT, evt)
     61         # Line break after swallowed comment:
     62         self.assertEqual(pulldom.CHARACTERS, evt)
     63         evt, node = next(items)
     64         self.assertEqual("title", node.tagName)
     65         title_node = node
     66         evt, node = next(items)
     67         self.assertEqual(pulldom.CHARACTERS, evt)
     68         self.assertEqual("Introduction to XSL", node.data)
     69         evt, node = next(items)
     70         self.assertEqual(pulldom.END_ELEMENT, evt)
     71         self.assertEqual("title", node.tagName)
     72         self.assertTrue(title_node is node)
     73         evt, node = next(items)
     74         self.assertEqual(pulldom.CHARACTERS, evt)
     75         evt, node = next(items)
     76         self.assertEqual(pulldom.START_ELEMENT, evt)
     77         self.assertEqual("hr", node.tagName)
     78         evt, node = next(items)
     79         self.assertEqual(pulldom.END_ELEMENT, evt)
     80         self.assertEqual("hr", node.tagName)
     81         evt, node = next(items)
     82         self.assertEqual(pulldom.CHARACTERS, evt)
     83         evt, node = next(items)
     84         self.assertEqual(pulldom.START_ELEMENT, evt)
     85         self.assertEqual("p", node.tagName)
     86         evt, node = next(items)
     87         self.assertEqual(pulldom.START_ELEMENT, evt)
     88         self.assertEqual("xdc:author", node.tagName)
     89         evt, node = next(items)
     90         self.assertEqual(pulldom.CHARACTERS, evt)
     91         evt, node = next(items)
     92         self.assertEqual(pulldom.END_ELEMENT, evt)
     93         self.assertEqual("xdc:author", node.tagName)
     94         evt, node = next(items)
     95         self.assertEqual(pulldom.END_ELEMENT, evt)
     96         evt, node = next(items)
     97         self.assertEqual(pulldom.CHARACTERS, evt)
     98         evt, node = next(items)
     99         self.assertEqual(pulldom.END_ELEMENT, evt)
    100         # XXX No END_DOCUMENT item is ever obtained:
    101         #evt, node = next(items)
    102         #self.assertEqual(pulldom.END_DOCUMENT, evt)
    103 
    104     def test_expandItem(self):
    105         """Ensure expandItem works as expected."""
    106         items = pulldom.parseString(SMALL_SAMPLE)
    107         # Loop through the nodes until we get to a "title" start tag:
    108         for evt, item in items:
    109             if evt == pulldom.START_ELEMENT and item.tagName == "title":
    110                 items.expandNode(item)
    111                 self.assertEqual(1, len(item.childNodes))
    112                 break
    113         else:
    114             self.fail("No \"title\" element detected in SMALL_SAMPLE!")
    115         # Loop until we get to the next start-element:
    116         for evt, node in items:
    117             if evt == pulldom.START_ELEMENT:
    118                 break
    119         self.assertEqual("hr", node.tagName,
    120             "expandNode did not leave DOMEventStream in the correct state.")
    121         # Attempt to expand a standalone element:
    122         items.expandNode(node)
    123         self.assertEqual(next(items)[0], pulldom.CHARACTERS)
    124         evt, node = next(items)
    125         self.assertEqual(node.tagName, "p")
    126         items.expandNode(node)
    127         next(items) # Skip character data
    128         evt, node = next(items)
    129         self.assertEqual(node.tagName, "html")
    130         with self.assertRaises(StopIteration):
    131             next(items)
    132         items.clear()
    133         self.assertIsNone(items.parser)
    134         self.assertIsNone(items.stream)
    135 
    136     @unittest.expectedFailure
    137     def test_comment(self):
    138         """PullDOM does not receive "comment" events."""
    139         items = pulldom.parseString(SMALL_SAMPLE)
    140         for evt, _ in items:
    141             if evt == pulldom.COMMENT:
    142                 break
    143         else:
    144             self.fail("No comment was encountered")
    145 
    146     @unittest.expectedFailure
    147     def test_end_document(self):
    148         """PullDOM does not receive "end-document" events."""
    149         items = pulldom.parseString(SMALL_SAMPLE)
    150         # Read all of the nodes up to and including </html>:
    151         for evt, node in items:
    152             if evt == pulldom.END_ELEMENT and node.tagName == "html":
    153                 break
    154         try:
    155             # Assert that the next node is END_DOCUMENT:
    156             evt, node = next(items)
    157             self.assertEqual(pulldom.END_DOCUMENT, evt)
    158         except StopIteration:
    159             self.fail(
    160                 "Ran out of events, but should have received END_DOCUMENT")
    161 
    162 
    163 class ThoroughTestCase(unittest.TestCase):
    164     """Test the hard-to-reach parts of pulldom."""
    165 
    166     def test_thorough_parse(self):
    167         """Test some of the hard-to-reach parts of PullDOM."""
    168         self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
    169 
    170     @unittest.expectedFailure
    171     def test_sax2dom_fail(self):
    172         """SAX2DOM can"t handle a PI before the root element."""
    173         pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
    174         self._test_thorough(pd)
    175 
    176     def test_thorough_sax2dom(self):
    177         """Test some of the hard-to-reach parts of SAX2DOM."""
    178         pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
    179         self._test_thorough(pd, False)
    180 
    181     def _test_thorough(self, pd, before_root=True):
    182         """Test some of the hard-to-reach parts of the parser, using a mock
    183         parser."""
    184 
    185         evt, node = next(pd)
    186         self.assertEqual(pulldom.START_DOCUMENT, evt)
    187         # Just check the node is a Document:
    188         self.assertTrue(hasattr(node, "createElement"))
    189 
    190         if before_root:
    191             evt, node = next(pd)
    192             self.assertEqual(pulldom.COMMENT, evt)
    193             self.assertEqual("a comment", node.data)
    194             evt, node = next(pd)
    195             self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
    196             self.assertEqual("target", node.target)
    197             self.assertEqual("data", node.data)
    198 
    199         evt, node = next(pd)
    200         self.assertEqual(pulldom.START_ELEMENT, evt)
    201         self.assertEqual("html", node.tagName)
    202 
    203         evt, node = next(pd)
    204         self.assertEqual(pulldom.COMMENT, evt)
    205         self.assertEqual("a comment", node.data)
    206         evt, node = next(pd)
    207         self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
    208         self.assertEqual("target", node.target)
    209         self.assertEqual("data", node.data)
    210 
    211         evt, node = next(pd)
    212         self.assertEqual(pulldom.START_ELEMENT, evt)
    213         self.assertEqual("p", node.tagName)
    214 
    215         evt, node = next(pd)
    216         self.assertEqual(pulldom.CHARACTERS, evt)
    217         self.assertEqual("text", node.data)
    218         evt, node = next(pd)
    219         self.assertEqual(pulldom.END_ELEMENT, evt)
    220         self.assertEqual("p", node.tagName)
    221         evt, node = next(pd)
    222         self.assertEqual(pulldom.END_ELEMENT, evt)
    223         self.assertEqual("html", node.tagName)
    224         evt, node = next(pd)
    225         self.assertEqual(pulldom.END_DOCUMENT, evt)
    226 
    227 
    228 class SAXExerciser(object):
    229     """A fake sax parser that calls some of the harder-to-reach sax methods to
    230     ensure it emits the correct events"""
    231 
    232     def setContentHandler(self, handler):
    233         self._handler = handler
    234 
    235     def parse(self, _):
    236         h = self._handler
    237         h.startDocument()
    238 
    239         # The next two items ensure that items preceding the first
    240         # start_element are properly stored and emitted:
    241         h.comment("a comment")
    242         h.processingInstruction("target", "data")
    243 
    244         h.startElement("html", AttributesImpl({}))
    245 
    246         h.comment("a comment")
    247         h.processingInstruction("target", "data")
    248 
    249         h.startElement("p", AttributesImpl({"class": "paraclass"}))
    250         h.characters("text")
    251         h.endElement("p")
    252         h.endElement("html")
    253         h.endDocument()
    254 
    255     def stub(self, *args, **kwargs):
    256         """Stub method. Does nothing."""
    257         pass
    258     setProperty = stub
    259     setFeature = stub
    260 
    261 
    262 class SAX2DOMExerciser(SAXExerciser):
    263     """The same as SAXExerciser, but without the processing instruction and
    264     comment before the root element, because S2D can"t handle it"""
    265 
    266     def parse(self, _):
    267         h = self._handler
    268         h.startDocument()
    269         h.startElement("html", AttributesImpl({}))
    270         h.comment("a comment")
    271         h.processingInstruction("target", "data")
    272         h.startElement("p", AttributesImpl({"class": "paraclass"}))
    273         h.characters("text")
    274         h.endElement("p")
    275         h.endElement("html")
    276         h.endDocument()
    277 
    278 
    279 class SAX2DOMTestHelper(pulldom.DOMEventStream):
    280     """Allows us to drive SAX2DOM from a DOMEventStream."""
    281 
    282     def reset(self):
    283         self.pulldom = pulldom.SAX2DOM()
    284         # This content handler relies on namespace support
    285         self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
    286         self.parser.setContentHandler(self.pulldom)
    287 
    288 
    289 class SAX2DOMTestCase(unittest.TestCase):
    290 
    291     def confirm(self, test, testname="Test"):
    292         self.assertTrue(test, testname)
    293 
    294     def test_basic(self):
    295         """Ensure SAX2DOM can parse from a stream."""
    296         with io.StringIO(SMALL_SAMPLE) as fin:
    297             sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
    298                                    len(SMALL_SAMPLE))
    299             for evt, node in sd:
    300                 if evt == pulldom.START_ELEMENT and node.tagName == "html":
    301                     break
    302             # Because the buffer is the same length as the XML, all the
    303             # nodes should have been parsed and added:
    304             self.assertGreater(len(node.childNodes), 0)
    305 
    306     def testSAX2DOM(self):
    307         """Ensure SAX2DOM expands nodes as expected."""
    308         sax2dom = pulldom.SAX2DOM()
    309         sax2dom.startDocument()
    310         sax2dom.startElement("doc", {})
    311         sax2dom.characters("text")
    312         sax2dom.startElement("subelm", {})
    313         sax2dom.characters("text")
    314         sax2dom.endElement("subelm")
    315         sax2dom.characters("text")
    316         sax2dom.endElement("doc")
    317         sax2dom.endDocument()
    318 
    319         doc = sax2dom.document
    320         root = doc.documentElement
    321         (text1, elm1, text2) = root.childNodes
    322         text3 = elm1.childNodes[0]
    323 
    324         self.assertIsNone(text1.previousSibling)
    325         self.assertIs(text1.nextSibling, elm1)
    326         self.assertIs(elm1.previousSibling, text1)
    327         self.assertIs(elm1.nextSibling, text2)
    328         self.assertIs(text2.previousSibling, elm1)
    329         self.assertIsNone(text2.nextSibling)
    330         self.assertIsNone(text3.previousSibling)
    331         self.assertIsNone(text3.nextSibling)
    332 
    333         self.assertIs(root.parentNode, doc)
    334         self.assertIs(text1.parentNode, root)
    335         self.assertIs(elm1.parentNode, root)
    336         self.assertIs(text2.parentNode, root)
    337         self.assertIs(text3.parentNode, elm1)
    338         doc.unlink()
    339 
    340 
    341 if __name__ == "__main__":
    342     unittest.main()
    343