1 from __future__ import absolute_import, division, unicode_literals 2 3 from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ 4 COMMENT, IGNORABLE_WHITESPACE, CHARACTERS 5 6 from . import _base 7 8 from ..constants import voidElements 9 10 11 class TreeWalker(_base.TreeWalker): 12 def __iter__(self): 13 ignore_until = None 14 previous = None 15 for event in self.tree: 16 if previous is not None and \ 17 (ignore_until is None or previous[1] is ignore_until): 18 if previous[1] is ignore_until: 19 ignore_until = None 20 for token in self.tokens(previous, event): 21 yield token 22 if token["type"] == "EmptyTag": 23 ignore_until = previous[1] 24 previous = event 25 if ignore_until is None or previous[1] is ignore_until: 26 for token in self.tokens(previous, None): 27 yield token 28 elif ignore_until is not None: 29 raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") 30 31 def tokens(self, event, next): 32 type, node = event 33 if type == START_ELEMENT: 34 name = node.nodeName 35 namespace = node.namespaceURI 36 attrs = {} 37 for attr in list(node.attributes.keys()): 38 attr = node.getAttributeNode(attr) 39 attrs[(attr.namespaceURI, attr.localName)] = attr.value 40 if name in voidElements: 41 for token in self.emptyTag(namespace, 42 name, 43 attrs, 44 not next or next[1] is not node): 45 yield token 46 else: 47 yield self.startTag(namespace, name, attrs) 48 49 elif type == END_ELEMENT: 50 name = node.nodeName 51 namespace = node.namespaceURI 52 if name not in voidElements: 53 yield self.endTag(namespace, name) 54 55 elif type == COMMENT: 56 yield self.comment(node.nodeValue) 57 58 elif type in (IGNORABLE_WHITESPACE, CHARACTERS): 59 for token in self.text(node.nodeValue): 60 yield token 61 62 else: 63 yield self.unknown(type) 64