Home | History | Annotate | Download | only in treewalkers
      1 from __future__ import absolute_import, division, unicode_literals
      2 
      3 try:
      4     from collections import OrderedDict
      5 except ImportError:
      6     try:
      7         from ordereddict import OrderedDict
      8     except ImportError:
      9         OrderedDict = dict
     10 
     11 import re
     12 
     13 from six import string_types
     14 
     15 from . import _base
     16 from ..utils import moduleFactoryFactory
     17 
     18 tag_regexp = re.compile("{([^}]*)}(.*)")
     19 
     20 
     21 def getETreeBuilder(ElementTreeImplementation):
     22     ElementTree = ElementTreeImplementation
     23     ElementTreeCommentType = ElementTree.Comment("asd").tag
     24 
     25     class TreeWalker(_base.NonRecursiveTreeWalker):
     26         """Given the particular ElementTree representation, this implementation,
     27         to avoid using recursion, returns "nodes" as tuples with the following
     28         content:
     29 
     30         1. The current element
     31 
     32         2. The index of the element relative to its parent
     33 
     34         3. A stack of ancestor elements
     35 
     36         4. A flag "text", "tail" or None to indicate if the current node is a
     37            text node; either the text or tail of the current element (1)
     38         """
     39         def getNodeDetails(self, node):
     40             if isinstance(node, tuple):  # It might be the root Element
     41                 elt, key, parents, flag = node
     42                 if flag in ("text", "tail"):
     43                     return _base.TEXT, getattr(elt, flag)
     44                 else:
     45                     node = elt
     46 
     47             if not(hasattr(node, "tag")):
     48                 node = node.getroot()
     49 
     50             if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
     51                 return (_base.DOCUMENT,)
     52 
     53             elif node.tag == "<!DOCTYPE>":
     54                 return (_base.DOCTYPE, node.text,
     55                         node.get("publicId"), node.get("systemId"))
     56 
     57             elif node.tag == ElementTreeCommentType:
     58                 return _base.COMMENT, node.text
     59 
     60             else:
     61                 assert isinstance(node.tag, string_types), type(node.tag)
     62                 # This is assumed to be an ordinary element
     63                 match = tag_regexp.match(node.tag)
     64                 if match:
     65                     namespace, tag = match.groups()
     66                 else:
     67                     namespace = None
     68                     tag = node.tag
     69                 attrs = OrderedDict()
     70                 for name, value in list(node.attrib.items()):
     71                     match = tag_regexp.match(name)
     72                     if match:
     73                         attrs[(match.group(1), match.group(2))] = value
     74                     else:
     75                         attrs[(None, name)] = value
     76                 return (_base.ELEMENT, namespace, tag,
     77                         attrs, len(node) or node.text)
     78 
     79         def getFirstChild(self, node):
     80             if isinstance(node, tuple):
     81                 element, key, parents, flag = node
     82             else:
     83                 element, key, parents, flag = node, None, [], None
     84 
     85             if flag in ("text", "tail"):
     86                 return None
     87             else:
     88                 if element.text:
     89                     return element, key, parents, "text"
     90                 elif len(element):
     91                     parents.append(element)
     92                     return element[0], 0, parents, None
     93                 else:
     94                     return None
     95 
     96         def getNextSibling(self, node):
     97             if isinstance(node, tuple):
     98                 element, key, parents, flag = node
     99             else:
    100                 return None
    101 
    102             if flag == "text":
    103                 if len(element):
    104                     parents.append(element)
    105                     return element[0], 0, parents, None
    106                 else:
    107                     return None
    108             else:
    109                 if element.tail and flag != "tail":
    110                     return element, key, parents, "tail"
    111                 elif key < len(parents[-1]) - 1:
    112                     return parents[-1][key + 1], key + 1, parents, None
    113                 else:
    114                     return None
    115 
    116         def getParentNode(self, node):
    117             if isinstance(node, tuple):
    118                 element, key, parents, flag = node
    119             else:
    120                 return None
    121 
    122             if flag == "text":
    123                 if not parents:
    124                     return element
    125                 else:
    126                     return element, key, parents, None
    127             else:
    128                 parent = parents.pop()
    129                 if not parents:
    130                     return parent
    131                 else:
    132                     return parent, list(parents[-1]).index(parent), parents, None
    133 
    134     return locals()
    135 
    136 getETreeModule = moduleFactoryFactory(getETreeBuilder)
    137