1 from __future__ import absolute_import, division, unicode_literals 2 3 try: 4 from collections import OrderedDict 5 except ImportError: 6 try: 7 from ordereddict import OrderedDict 8 except ImportError: 9 OrderedDict = dict 10 11 import re 12 13 from six import string_types 14 15 from . import _base 16 from ..utils import moduleFactoryFactory 17 18 tag_regexp = re.compile("{([^}]*)}(.*)") 19 20 21 def getETreeBuilder(ElementTreeImplementation): 22 ElementTree = ElementTreeImplementation 23 ElementTreeCommentType = ElementTree.Comment("asd").tag 24 25 class TreeWalker(_base.NonRecursiveTreeWalker): 26 """Given the particular ElementTree representation, this implementation, 27 to avoid using recursion, returns "nodes" as tuples with the following 28 content: 29 30 1. The current element 31 32 2. The index of the element relative to its parent 33 34 3. A stack of ancestor elements 35 36 4. A flag "text", "tail" or None to indicate if the current node is a 37 text node; either the text or tail of the current element (1) 38 """ 39 def getNodeDetails(self, node): 40 if isinstance(node, tuple): # It might be the root Element 41 elt, key, parents, flag = node 42 if flag in ("text", "tail"): 43 return _base.TEXT, getattr(elt, flag) 44 else: 45 node = elt 46 47 if not(hasattr(node, "tag")): 48 node = node.getroot() 49 50 if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): 51 return (_base.DOCUMENT,) 52 53 elif node.tag == "<!DOCTYPE>": 54 return (_base.DOCTYPE, node.text, 55 node.get("publicId"), node.get("systemId")) 56 57 elif node.tag == ElementTreeCommentType: 58 return _base.COMMENT, node.text 59 60 else: 61 assert isinstance(node.tag, string_types), type(node.tag) 62 # This is assumed to be an ordinary element 63 match = tag_regexp.match(node.tag) 64 if match: 65 namespace, tag = match.groups() 66 else: 67 namespace = None 68 tag = node.tag 69 attrs = OrderedDict() 70 for name, value in list(node.attrib.items()): 71 match = tag_regexp.match(name) 72 if match: 73 attrs[(match.group(1), match.group(2))] = value 74 else: 75 attrs[(None, name)] = value 76 return (_base.ELEMENT, namespace, tag, 77 attrs, len(node) or node.text) 78 79 def getFirstChild(self, node): 80 if isinstance(node, tuple): 81 element, key, parents, flag = node 82 else: 83 element, key, parents, flag = node, None, [], None 84 85 if flag in ("text", "tail"): 86 return None 87 else: 88 if element.text: 89 return element, key, parents, "text" 90 elif len(element): 91 parents.append(element) 92 return element[0], 0, parents, None 93 else: 94 return None 95 96 def getNextSibling(self, node): 97 if isinstance(node, tuple): 98 element, key, parents, flag = node 99 else: 100 return None 101 102 if flag == "text": 103 if len(element): 104 parents.append(element) 105 return element[0], 0, parents, None 106 else: 107 return None 108 else: 109 if element.tail and flag != "tail": 110 return element, key, parents, "tail" 111 elif key < len(parents[-1]) - 1: 112 return parents[-1][key + 1], key + 1, parents, None 113 else: 114 return None 115 116 def getParentNode(self, node): 117 if isinstance(node, tuple): 118 element, key, parents, flag = node 119 else: 120 return None 121 122 if flag == "text": 123 if not parents: 124 return element 125 else: 126 return element, key, parents, None 127 else: 128 parent = parents.pop() 129 if not parents: 130 return parent 131 else: 132 return parent, list(parents[-1]).index(parent), parents, None 133 134 return locals() 135 136 getETreeModule = moduleFactoryFactory(getETreeBuilder) 137