Home | History | Annotate | Download | only in treewalkers
      1 from __future__ import absolute_import, division, unicode_literals
      2 
      3 from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
      4     COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
      5 
      6 from . import _base
      7 
      8 from ..constants import voidElements
      9 
     10 
     11 class TreeWalker(_base.TreeWalker):
     12     def __iter__(self):
     13         ignore_until = None
     14         previous = None
     15         for event in self.tree:
     16             if previous is not None and \
     17                     (ignore_until is None or previous[1] is ignore_until):
     18                 if previous[1] is ignore_until:
     19                     ignore_until = None
     20                 for token in self.tokens(previous, event):
     21                     yield token
     22                     if token["type"] == "EmptyTag":
     23                         ignore_until = previous[1]
     24             previous = event
     25         if ignore_until is None or previous[1] is ignore_until:
     26             for token in self.tokens(previous, None):
     27                 yield token
     28         elif ignore_until is not None:
     29             raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
     30 
     31     def tokens(self, event, next):
     32         type, node = event
     33         if type == START_ELEMENT:
     34             name = node.nodeName
     35             namespace = node.namespaceURI
     36             attrs = {}
     37             for attr in list(node.attributes.keys()):
     38                 attr = node.getAttributeNode(attr)
     39                 attrs[(attr.namespaceURI, attr.localName)] = attr.value
     40             if name in voidElements:
     41                 for token in self.emptyTag(namespace,
     42                                            name,
     43                                            attrs,
     44                                            not next or next[1] is not node):
     45                     yield token
     46             else:
     47                 yield self.startTag(namespace, name, attrs)
     48 
     49         elif type == END_ELEMENT:
     50             name = node.nodeName
     51             namespace = node.namespaceURI
     52             if name not in voidElements:
     53                 yield self.endTag(namespace, name)
     54 
     55         elif type == COMMENT:
     56             yield self.comment(node.nodeValue)
     57 
     58         elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
     59             for token in self.text(node.nodeValue):
     60                 yield token
     61 
     62         else:
     63             yield self.unknown(type)
     64