Home | History | Annotate | Download | only in dom
      1 """Simple implementation of the Level 1 DOM.
      2 
      3 Namespaces and other minor Level 2 features are also supported.
      4 
      5 parse("foo.xml")
      6 
      7 parseString("<foo><bar/></foo>")
      8 
      9 Todo:
     10 =====
     11  * convenience methods for getting elements and text.
     12  * more testing
     13  * bring some of the writer and linearizer code into conformance with this
     14         interface
     15  * SAX 2 namespaces
     16 """
     17 
     18 import io
     19 import xml.dom
     20 
     21 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
     22 from xml.dom.minicompat import *
     23 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
     24 
     25 # This is used by the ID-cache invalidation checks; the list isn't
     26 # actually complete, since the nodes being checked will never be the
     27 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
     28 # the node being added or removed, not the node being modified.)
     29 #
     30 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
     31                             xml.dom.Node.ENTITY_REFERENCE_NODE)
     32 
     33 
     34 class Node(xml.dom.Node):
     35     namespaceURI = None # this is non-null only for elements and attributes
     36     parentNode = None
     37     ownerDocument = None
     38     nextSibling = None
     39     previousSibling = None
     40 
     41     prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
     42 
     43     def __bool__(self):
     44         return True
     45 
     46     def toxml(self, encoding=None):
     47         return self.toprettyxml("", "", encoding)
     48 
     49     def toprettyxml(self, indent="\t", newl="\n", encoding=None):
     50         if encoding is None:
     51             writer = io.StringIO()
     52         else:
     53             writer = io.TextIOWrapper(io.BytesIO(),
     54                                       encoding=encoding,
     55                                       errors="xmlcharrefreplace",
     56                                       newline='\n')
     57         if self.nodeType == Node.DOCUMENT_NODE:
     58             # Can pass encoding only to document, to put it into XML header
     59             self.writexml(writer, "", indent, newl, encoding)
     60         else:
     61             self.writexml(writer, "", indent, newl)
     62         if encoding is None:
     63             return writer.getvalue()
     64         else:
     65             return writer.detach().getvalue()
     66 
     67     def hasChildNodes(self):
     68         return bool(self.childNodes)
     69 
     70     def _get_childNodes(self):
     71         return self.childNodes
     72 
     73     def _get_firstChild(self):
     74         if self.childNodes:
     75             return self.childNodes[0]
     76 
     77     def _get_lastChild(self):
     78         if self.childNodes:
     79             return self.childNodes[-1]
     80 
     81     def insertBefore(self, newChild, refChild):
     82         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
     83             for c in tuple(newChild.childNodes):
     84                 self.insertBefore(c, refChild)
     85             ### The DOM does not clearly specify what to return in this case
     86             return newChild
     87         if newChild.nodeType not in self._child_node_types:
     88             raise xml.dom.HierarchyRequestErr(
     89                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
     90         if newChild.parentNode is not None:
     91             newChild.parentNode.removeChild(newChild)
     92         if refChild is None:
     93             self.appendChild(newChild)
     94         else:
     95             try:
     96                 index = self.childNodes.index(refChild)
     97             except ValueError:
     98                 raise xml.dom.NotFoundErr()
     99             if newChild.nodeType in _nodeTypes_with_children:
    100                 _clear_id_cache(self)
    101             self.childNodes.insert(index, newChild)
    102             newChild.nextSibling = refChild
    103             refChild.previousSibling = newChild
    104             if index:
    105                 node = self.childNodes[index-1]
    106                 node.nextSibling = newChild
    107                 newChild.previousSibling = node
    108             else:
    109                 newChild.previousSibling = None
    110             newChild.parentNode = self
    111         return newChild
    112 
    113     def appendChild(self, node):
    114         if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
    115             for c in tuple(node.childNodes):
    116                 self.appendChild(c)
    117             ### The DOM does not clearly specify what to return in this case
    118             return node
    119         if node.nodeType not in self._child_node_types:
    120             raise xml.dom.HierarchyRequestErr(
    121                 "%s cannot be child of %s" % (repr(node), repr(self)))
    122         elif node.nodeType in _nodeTypes_with_children:
    123             _clear_id_cache(self)
    124         if node.parentNode is not None:
    125             node.parentNode.removeChild(node)
    126         _append_child(self, node)
    127         node.nextSibling = None
    128         return node
    129 
    130     def replaceChild(self, newChild, oldChild):
    131         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
    132             refChild = oldChild.nextSibling
    133             self.removeChild(oldChild)
    134             return self.insertBefore(newChild, refChild)
    135         if newChild.nodeType not in self._child_node_types:
    136             raise xml.dom.HierarchyRequestErr(
    137                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
    138         if newChild is oldChild:
    139             return
    140         if newChild.parentNode is not None:
    141             newChild.parentNode.removeChild(newChild)
    142         try:
    143             index = self.childNodes.index(oldChild)
    144         except ValueError:
    145             raise xml.dom.NotFoundErr()
    146         self.childNodes[index] = newChild
    147         newChild.parentNode = self
    148         oldChild.parentNode = None
    149         if (newChild.nodeType in _nodeTypes_with_children
    150             or oldChild.nodeType in _nodeTypes_with_children):
    151             _clear_id_cache(self)
    152         newChild.nextSibling = oldChild.nextSibling
    153         newChild.previousSibling = oldChild.previousSibling
    154         oldChild.nextSibling = None
    155         oldChild.previousSibling = None
    156         if newChild.previousSibling:
    157             newChild.previousSibling.nextSibling = newChild
    158         if newChild.nextSibling:
    159             newChild.nextSibling.previousSibling = newChild
    160         return oldChild
    161 
    162     def removeChild(self, oldChild):
    163         try:
    164             self.childNodes.remove(oldChild)
    165         except ValueError:
    166             raise xml.dom.NotFoundErr()
    167         if oldChild.nextSibling is not None:
    168             oldChild.nextSibling.previousSibling = oldChild.previousSibling
    169         if oldChild.previousSibling is not None:
    170             oldChild.previousSibling.nextSibling = oldChild.nextSibling
    171         oldChild.nextSibling = oldChild.previousSibling = None
    172         if oldChild.nodeType in _nodeTypes_with_children:
    173             _clear_id_cache(self)
    174 
    175         oldChild.parentNode = None
    176         return oldChild
    177 
    178     def normalize(self):
    179         L = []
    180         for child in self.childNodes:
    181             if child.nodeType == Node.TEXT_NODE:
    182                 if not child.data:
    183                     # empty text node; discard
    184                     if L:
    185                         L[-1].nextSibling = child.nextSibling
    186                     if child.nextSibling:
    187                         child.nextSibling.previousSibling = child.previousSibling
    188                     child.unlink()
    189                 elif L and L[-1].nodeType == child.nodeType:
    190                     # collapse text node
    191                     node = L[-1]
    192                     node.data = node.data + child.data
    193                     node.nextSibling = child.nextSibling
    194                     if child.nextSibling:
    195                         child.nextSibling.previousSibling = node
    196                     child.unlink()
    197                 else:
    198                     L.append(child)
    199             else:
    200                 L.append(child)
    201                 if child.nodeType == Node.ELEMENT_NODE:
    202                     child.normalize()
    203         self.childNodes[:] = L
    204 
    205     def cloneNode(self, deep):
    206         return _clone_node(self, deep, self.ownerDocument or self)
    207 
    208     def isSupported(self, feature, version):
    209         return self.ownerDocument.implementation.hasFeature(feature, version)
    210 
    211     def _get_localName(self):
    212         # Overridden in Element and Attr where localName can be Non-Null
    213         return None
    214 
    215     # Node interfaces from Level 3 (WD 9 April 2002)
    216 
    217     def isSameNode(self, other):
    218         return self is other
    219 
    220     def getInterface(self, feature):
    221         if self.isSupported(feature, None):
    222             return self
    223         else:
    224             return None
    225 
    226     # The "user data" functions use a dictionary that is only present
    227     # if some user data has been set, so be careful not to assume it
    228     # exists.
    229 
    230     def getUserData(self, key):
    231         try:
    232             return self._user_data[key][0]
    233         except (AttributeError, KeyError):
    234             return None
    235 
    236     def setUserData(self, key, data, handler):
    237         old = None
    238         try:
    239             d = self._user_data
    240         except AttributeError:
    241             d = {}
    242             self._user_data = d
    243         if key in d:
    244             old = d[key][0]
    245         if data is None:
    246             # ignore handlers passed for None
    247             handler = None
    248             if old is not None:
    249                 del d[key]
    250         else:
    251             d[key] = (data, handler)
    252         return old
    253 
    254     def _call_user_data_handler(self, operation, src, dst):
    255         if hasattr(self, "_user_data"):
    256             for key, (data, handler) in list(self._user_data.items()):
    257                 if handler is not None:
    258                     handler.handle(operation, key, data, src, dst)
    259 
    260     # minidom-specific API:
    261 
    262     def unlink(self):
    263         self.parentNode = self.ownerDocument = None
    264         if self.childNodes:
    265             for child in self.childNodes:
    266                 child.unlink()
    267             self.childNodes = NodeList()
    268         self.previousSibling = None
    269         self.nextSibling = None
    270 
    271     # A Node is its own context manager, to ensure that an unlink() call occurs.
    272     # This is similar to how a file object works.
    273     def __enter__(self):
    274         return self
    275 
    276     def __exit__(self, et, ev, tb):
    277         self.unlink()
    278 
    279 defproperty(Node, "firstChild", doc="First child node, or None.")
    280 defproperty(Node, "lastChild",  doc="Last child node, or None.")
    281 defproperty(Node, "localName",  doc="Namespace-local name of this node.")
    282 
    283 
    284 def _append_child(self, node):
    285     # fast path with less checks; usable by DOM builders if careful
    286     childNodes = self.childNodes
    287     if childNodes:
    288         last = childNodes[-1]
    289         node.previousSibling = last
    290         last.nextSibling = node
    291     childNodes.append(node)
    292     node.parentNode = self
    293 
    294 def _in_document(node):
    295     # return True iff node is part of a document tree
    296     while node is not None:
    297         if node.nodeType == Node.DOCUMENT_NODE:
    298             return True
    299         node = node.parentNode
    300     return False
    301 
    302 def _write_data(writer, data):
    303     "Writes datachars to writer."
    304     if data:
    305         data = data.replace("&", "&amp;").replace("<", "&lt;"). \
    306                     replace("\"", "&quot;").replace(">", "&gt;")
    307         writer.write(data)
    308 
    309 def _get_elements_by_tagName_helper(parent, name, rc):
    310     for node in parent.childNodes:
    311         if node.nodeType == Node.ELEMENT_NODE and \
    312             (name == "*" or node.tagName == name):
    313             rc.append(node)
    314         _get_elements_by_tagName_helper(node, name, rc)
    315     return rc
    316 
    317 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
    318     for node in parent.childNodes:
    319         if node.nodeType == Node.ELEMENT_NODE:
    320             if ((localName == "*" or node.localName == localName) and
    321                 (nsURI == "*" or node.namespaceURI == nsURI)):
    322                 rc.append(node)
    323             _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
    324     return rc
    325 
    326 class DocumentFragment(Node):
    327     nodeType = Node.DOCUMENT_FRAGMENT_NODE
    328     nodeName = "#document-fragment"
    329     nodeValue = None
    330     attributes = None
    331     parentNode = None
    332     _child_node_types = (Node.ELEMENT_NODE,
    333                          Node.TEXT_NODE,
    334                          Node.CDATA_SECTION_NODE,
    335                          Node.ENTITY_REFERENCE_NODE,
    336                          Node.PROCESSING_INSTRUCTION_NODE,
    337                          Node.COMMENT_NODE,
    338                          Node.NOTATION_NODE)
    339 
    340     def __init__(self):
    341         self.childNodes = NodeList()
    342 
    343 
    344 class Attr(Node):
    345     __slots__=('_name', '_value', 'namespaceURI',
    346                '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement')
    347     nodeType = Node.ATTRIBUTE_NODE
    348     attributes = None
    349     specified = False
    350     _is_id = False
    351 
    352     _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
    353 
    354     def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
    355                  prefix=None):
    356         self.ownerElement = None
    357         self._name = qName
    358         self.namespaceURI = namespaceURI
    359         self._prefix = prefix
    360         self.childNodes = NodeList()
    361 
    362         # Add the single child node that represents the value of the attr
    363         self.childNodes.append(Text())
    364 
    365         # nodeValue and value are set elsewhere
    366 
    367     def _get_localName(self):
    368         try:
    369             return self._localName
    370         except AttributeError:
    371             return self.nodeName.split(":", 1)[-1]
    372 
    373     def _get_specified(self):
    374         return self.specified
    375 
    376     def _get_name(self):
    377         return self._name
    378 
    379     def _set_name(self, value):
    380         self._name = value
    381         if self.ownerElement is not None:
    382             _clear_id_cache(self.ownerElement)
    383 
    384     nodeName = name = property(_get_name, _set_name)
    385 
    386     def _get_value(self):
    387         return self._value
    388 
    389     def _set_value(self, value):
    390         self._value = value
    391         self.childNodes[0].data = value
    392         if self.ownerElement is not None:
    393             _clear_id_cache(self.ownerElement)
    394         self.childNodes[0].data = value
    395 
    396     nodeValue = value = property(_get_value, _set_value)
    397 
    398     def _get_prefix(self):
    399         return self._prefix
    400 
    401     def _set_prefix(self, prefix):
    402         nsuri = self.namespaceURI
    403         if prefix == "xmlns":
    404             if nsuri and nsuri != XMLNS_NAMESPACE:
    405                 raise xml.dom.NamespaceErr(
    406                     "illegal use of 'xmlns' prefix for the wrong namespace")
    407         self._prefix = prefix
    408         if prefix is None:
    409             newName = self.localName
    410         else:
    411             newName = "%s:%s" % (prefix, self.localName)
    412         if self.ownerElement:
    413             _clear_id_cache(self.ownerElement)
    414         self.name = newName
    415 
    416     prefix = property(_get_prefix, _set_prefix)
    417 
    418     def unlink(self):
    419         # This implementation does not call the base implementation
    420         # since most of that is not needed, and the expense of the
    421         # method call is not warranted.  We duplicate the removal of
    422         # children, but that's all we needed from the base class.
    423         elem = self.ownerElement
    424         if elem is not None:
    425             del elem._attrs[self.nodeName]
    426             del elem._attrsNS[(self.namespaceURI, self.localName)]
    427             if self._is_id:
    428                 self._is_id = False
    429                 elem._magic_id_nodes -= 1
    430                 self.ownerDocument._magic_id_count -= 1
    431         for child in self.childNodes:
    432             child.unlink()
    433         del self.childNodes[:]
    434 
    435     def _get_isId(self):
    436         if self._is_id:
    437             return True
    438         doc = self.ownerDocument
    439         elem = self.ownerElement
    440         if doc is None or elem is None:
    441             return False
    442 
    443         info = doc._get_elem_info(elem)
    444         if info is None:
    445             return False
    446         if self.namespaceURI:
    447             return info.isIdNS(self.namespaceURI, self.localName)
    448         else:
    449             return info.isId(self.nodeName)
    450 
    451     def _get_schemaType(self):
    452         doc = self.ownerDocument
    453         elem = self.ownerElement
    454         if doc is None or elem is None:
    455             return _no_type
    456 
    457         info = doc._get_elem_info(elem)
    458         if info is None:
    459             return _no_type
    460         if self.namespaceURI:
    461             return info.getAttributeTypeNS(self.namespaceURI, self.localName)
    462         else:
    463             return info.getAttributeType(self.nodeName)
    464 
    465 defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
    466 defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
    467 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
    468 
    469 
    470 class NamedNodeMap(object):
    471     """The attribute list is a transient interface to the underlying
    472     dictionaries.  Mutations here will change the underlying element's
    473     dictionary.
    474 
    475     Ordering is imposed artificially and does not reflect the order of
    476     attributes as found in an input document.
    477     """
    478 
    479     __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
    480 
    481     def __init__(self, attrs, attrsNS, ownerElement):
    482         self._attrs = attrs
    483         self._attrsNS = attrsNS
    484         self._ownerElement = ownerElement
    485 
    486     def _get_length(self):
    487         return len(self._attrs)
    488 
    489     def item(self, index):
    490         try:
    491             return self[list(self._attrs.keys())[index]]
    492         except IndexError:
    493             return None
    494 
    495     def items(self):
    496         L = []
    497         for node in self._attrs.values():
    498             L.append((node.nodeName, node.value))
    499         return L
    500 
    501     def itemsNS(self):
    502         L = []
    503         for node in self._attrs.values():
    504             L.append(((node.namespaceURI, node.localName), node.value))
    505         return L
    506 
    507     def __contains__(self, key):
    508         if isinstance(key, str):
    509             return key in self._attrs
    510         else:
    511             return key in self._attrsNS
    512 
    513     def keys(self):
    514         return self._attrs.keys()
    515 
    516     def keysNS(self):
    517         return self._attrsNS.keys()
    518 
    519     def values(self):
    520         return self._attrs.values()
    521 
    522     def get(self, name, value=None):
    523         return self._attrs.get(name, value)
    524 
    525     __len__ = _get_length
    526 
    527     def _cmp(self, other):
    528         if self._attrs is getattr(other, "_attrs", None):
    529             return 0
    530         else:
    531             return (id(self) > id(other)) - (id(self) < id(other))
    532 
    533     def __eq__(self, other):
    534         return self._cmp(other) == 0
    535 
    536     def __ge__(self, other):
    537         return self._cmp(other) >= 0
    538 
    539     def __gt__(self, other):
    540         return self._cmp(other) > 0
    541 
    542     def __le__(self, other):
    543         return self._cmp(other) <= 0
    544 
    545     def __lt__(self, other):
    546         return self._cmp(other) < 0
    547 
    548     def __getitem__(self, attname_or_tuple):
    549         if isinstance(attname_or_tuple, tuple):
    550             return self._attrsNS[attname_or_tuple]
    551         else:
    552             return self._attrs[attname_or_tuple]
    553 
    554     # same as set
    555     def __setitem__(self, attname, value):
    556         if isinstance(value, str):
    557             try:
    558                 node = self._attrs[attname]
    559             except KeyError:
    560                 node = Attr(attname)
    561                 node.ownerDocument = self._ownerElement.ownerDocument
    562                 self.setNamedItem(node)
    563             node.value = value
    564         else:
    565             if not isinstance(value, Attr):
    566                 raise TypeError("value must be a string or Attr object")
    567             node = value
    568             self.setNamedItem(node)
    569 
    570     def getNamedItem(self, name):
    571         try:
    572             return self._attrs[name]
    573         except KeyError:
    574             return None
    575 
    576     def getNamedItemNS(self, namespaceURI, localName):
    577         try:
    578             return self._attrsNS[(namespaceURI, localName)]
    579         except KeyError:
    580             return None
    581 
    582     def removeNamedItem(self, name):
    583         n = self.getNamedItem(name)
    584         if n is not None:
    585             _clear_id_cache(self._ownerElement)
    586             del self._attrs[n.nodeName]
    587             del self._attrsNS[(n.namespaceURI, n.localName)]
    588             if hasattr(n, 'ownerElement'):
    589                 n.ownerElement = None
    590             return n
    591         else:
    592             raise xml.dom.NotFoundErr()
    593 
    594     def removeNamedItemNS(self, namespaceURI, localName):
    595         n = self.getNamedItemNS(namespaceURI, localName)
    596         if n is not None:
    597             _clear_id_cache(self._ownerElement)
    598             del self._attrsNS[(n.namespaceURI, n.localName)]
    599             del self._attrs[n.nodeName]
    600             if hasattr(n, 'ownerElement'):
    601                 n.ownerElement = None
    602             return n
    603         else:
    604             raise xml.dom.NotFoundErr()
    605 
    606     def setNamedItem(self, node):
    607         if not isinstance(node, Attr):
    608             raise xml.dom.HierarchyRequestErr(
    609                 "%s cannot be child of %s" % (repr(node), repr(self)))
    610         old = self._attrs.get(node.name)
    611         if old:
    612             old.unlink()
    613         self._attrs[node.name] = node
    614         self._attrsNS[(node.namespaceURI, node.localName)] = node
    615         node.ownerElement = self._ownerElement
    616         _clear_id_cache(node.ownerElement)
    617         return old
    618 
    619     def setNamedItemNS(self, node):
    620         return self.setNamedItem(node)
    621 
    622     def __delitem__(self, attname_or_tuple):
    623         node = self[attname_or_tuple]
    624         _clear_id_cache(node.ownerElement)
    625         node.unlink()
    626 
    627     def __getstate__(self):
    628         return self._attrs, self._attrsNS, self._ownerElement
    629 
    630     def __setstate__(self, state):
    631         self._attrs, self._attrsNS, self._ownerElement = state
    632 
    633 defproperty(NamedNodeMap, "length",
    634             doc="Number of nodes in the NamedNodeMap.")
    635 
    636 AttributeList = NamedNodeMap
    637 
    638 
    639 class TypeInfo(object):
    640     __slots__ = 'namespace', 'name'
    641 
    642     def __init__(self, namespace, name):
    643         self.namespace = namespace
    644         self.name = name
    645 
    646     def __repr__(self):
    647         if self.namespace:
    648             return "<%s %r (from %r)>" % (self.__class__.__name__, self.name,
    649                                           self.namespace)
    650         else:
    651             return "<%s %r>" % (self.__class__.__name__, self.name)
    652 
    653     def _get_name(self):
    654         return self.name
    655 
    656     def _get_namespace(self):
    657         return self.namespace
    658 
    659 _no_type = TypeInfo(None, None)
    660 
    661 class Element(Node):
    662     __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix',
    663                'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS',
    664                'nextSibling', 'previousSibling')
    665     nodeType = Node.ELEMENT_NODE
    666     nodeValue = None
    667     schemaType = _no_type
    668 
    669     _magic_id_nodes = 0
    670 
    671     _child_node_types = (Node.ELEMENT_NODE,
    672                          Node.PROCESSING_INSTRUCTION_NODE,
    673                          Node.COMMENT_NODE,
    674                          Node.TEXT_NODE,
    675                          Node.CDATA_SECTION_NODE,
    676                          Node.ENTITY_REFERENCE_NODE)
    677 
    678     def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
    679                  localName=None):
    680         self.parentNode = None
    681         self.tagName = self.nodeName = tagName
    682         self.prefix = prefix
    683         self.namespaceURI = namespaceURI
    684         self.childNodes = NodeList()
    685         self.nextSibling = self.previousSibling = None
    686 
    687         # Attribute dictionaries are lazily created
    688         # attributes are double-indexed:
    689         #    tagName -> Attribute
    690         #    URI,localName -> Attribute
    691         # in the future: consider lazy generation
    692         # of attribute objects this is too tricky
    693         # for now because of headaches with
    694         # namespaces.
    695         self._attrs = None
    696         self._attrsNS = None
    697 
    698     def _ensure_attributes(self):
    699         if self._attrs is None:
    700             self._attrs = {}
    701             self._attrsNS = {}
    702 
    703     def _get_localName(self):
    704         try:
    705             return self._localName
    706         except AttributeError:
    707             return self.tagName.split(":", 1)[-1]
    708 
    709     def _get_tagName(self):
    710         return self.tagName
    711 
    712     def unlink(self):
    713         if self._attrs is not None:
    714             for attr in list(self._attrs.values()):
    715                 attr.unlink()
    716         self._attrs = None
    717         self._attrsNS = None
    718         Node.unlink(self)
    719 
    720     def getAttribute(self, attname):
    721         if self._attrs is None:
    722             return ""
    723         try:
    724             return self._attrs[attname].value
    725         except KeyError:
    726             return ""
    727 
    728     def getAttributeNS(self, namespaceURI, localName):
    729         if self._attrsNS is None:
    730             return ""
    731         try:
    732             return self._attrsNS[(namespaceURI, localName)].value
    733         except KeyError:
    734             return ""
    735 
    736     def setAttribute(self, attname, value):
    737         attr = self.getAttributeNode(attname)
    738         if attr is None:
    739             attr = Attr(attname)
    740             attr.value = value # also sets nodeValue
    741             attr.ownerDocument = self.ownerDocument
    742             self.setAttributeNode(attr)
    743         elif value != attr.value:
    744             attr.value = value
    745             if attr.isId:
    746                 _clear_id_cache(self)
    747 
    748     def setAttributeNS(self, namespaceURI, qualifiedName, value):
    749         prefix, localname = _nssplit(qualifiedName)
    750         attr = self.getAttributeNodeNS(namespaceURI, localname)
    751         if attr is None:
    752             attr = Attr(qualifiedName, namespaceURI, localname, prefix)
    753             attr.value = value
    754             attr.ownerDocument = self.ownerDocument
    755             self.setAttributeNode(attr)
    756         else:
    757             if value != attr.value:
    758                 attr.value = value
    759                 if attr.isId:
    760                     _clear_id_cache(self)
    761             if attr.prefix != prefix:
    762                 attr.prefix = prefix
    763                 attr.nodeName = qualifiedName
    764 
    765     def getAttributeNode(self, attrname):
    766         if self._attrs is None:
    767             return None
    768         return self._attrs.get(attrname)
    769 
    770     def getAttributeNodeNS(self, namespaceURI, localName):
    771         if self._attrsNS is None:
    772             return None
    773         return self._attrsNS.get((namespaceURI, localName))
    774 
    775     def setAttributeNode(self, attr):
    776         if attr.ownerElement not in (None, self):
    777             raise xml.dom.InuseAttributeErr("attribute node already owned")
    778         self._ensure_attributes()
    779         old1 = self._attrs.get(attr.name, None)
    780         if old1 is not None:
    781             self.removeAttributeNode(old1)
    782         old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
    783         if old2 is not None and old2 is not old1:
    784             self.removeAttributeNode(old2)
    785         _set_attribute_node(self, attr)
    786 
    787         if old1 is not attr:
    788             # It might have already been part of this node, in which case
    789             # it doesn't represent a change, and should not be returned.
    790             return old1
    791         if old2 is not attr:
    792             return old2
    793 
    794     setAttributeNodeNS = setAttributeNode
    795 
    796     def removeAttribute(self, name):
    797         if self._attrsNS is None:
    798             raise xml.dom.NotFoundErr()
    799         try:
    800             attr = self._attrs[name]
    801         except KeyError:
    802             raise xml.dom.NotFoundErr()
    803         self.removeAttributeNode(attr)
    804 
    805     def removeAttributeNS(self, namespaceURI, localName):
    806         if self._attrsNS is None:
    807             raise xml.dom.NotFoundErr()
    808         try:
    809             attr = self._attrsNS[(namespaceURI, localName)]
    810         except KeyError:
    811             raise xml.dom.NotFoundErr()
    812         self.removeAttributeNode(attr)
    813 
    814     def removeAttributeNode(self, node):
    815         if node is None:
    816             raise xml.dom.NotFoundErr()
    817         try:
    818             self._attrs[node.name]
    819         except KeyError:
    820             raise xml.dom.NotFoundErr()
    821         _clear_id_cache(self)
    822         node.unlink()
    823         # Restore this since the node is still useful and otherwise
    824         # unlinked
    825         node.ownerDocument = self.ownerDocument
    826 
    827     removeAttributeNodeNS = removeAttributeNode
    828 
    829     def hasAttribute(self, name):
    830         if self._attrs is None:
    831             return False
    832         return name in self._attrs
    833 
    834     def hasAttributeNS(self, namespaceURI, localName):
    835         if self._attrsNS is None:
    836             return False
    837         return (namespaceURI, localName) in self._attrsNS
    838 
    839     def getElementsByTagName(self, name):
    840         return _get_elements_by_tagName_helper(self, name, NodeList())
    841 
    842     def getElementsByTagNameNS(self, namespaceURI, localName):
    843         return _get_elements_by_tagName_ns_helper(
    844             self, namespaceURI, localName, NodeList())
    845 
    846     def __repr__(self):
    847         return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
    848 
    849     def writexml(self, writer, indent="", addindent="", newl=""):
    850         # indent = current indentation
    851         # addindent = indentation to add to higher levels
    852         # newl = newline string
    853         writer.write(indent+"<" + self.tagName)
    854 
    855         attrs = self._get_attributes()
    856         a_names = sorted(attrs.keys())
    857 
    858         for a_name in a_names:
    859             writer.write(" %s=\"" % a_name)
    860             _write_data(writer, attrs[a_name].value)
    861             writer.write("\"")
    862         if self.childNodes:
    863             writer.write(">")
    864             if (len(self.childNodes) == 1 and
    865                 self.childNodes[0].nodeType == Node.TEXT_NODE):
    866                 self.childNodes[0].writexml(writer, '', '', '')
    867             else:
    868                 writer.write(newl)
    869                 for node in self.childNodes:
    870                     node.writexml(writer, indent+addindent, addindent, newl)
    871                 writer.write(indent)
    872             writer.write("</%s>%s" % (self.tagName, newl))
    873         else:
    874             writer.write("/>%s"%(newl))
    875 
    876     def _get_attributes(self):
    877         self._ensure_attributes()
    878         return NamedNodeMap(self._attrs, self._attrsNS, self)
    879 
    880     def hasAttributes(self):
    881         if self._attrs:
    882             return True
    883         else:
    884             return False
    885 
    886     # DOM Level 3 attributes, based on the 22 Oct 2002 draft
    887 
    888     def setIdAttribute(self, name):
    889         idAttr = self.getAttributeNode(name)
    890         self.setIdAttributeNode(idAttr)
    891 
    892     def setIdAttributeNS(self, namespaceURI, localName):
    893         idAttr = self.getAttributeNodeNS(namespaceURI, localName)
    894         self.setIdAttributeNode(idAttr)
    895 
    896     def setIdAttributeNode(self, idAttr):
    897         if idAttr is None or not self.isSameNode(idAttr.ownerElement):
    898             raise xml.dom.NotFoundErr()
    899         if _get_containing_entref(self) is not None:
    900             raise xml.dom.NoModificationAllowedErr()
    901         if not idAttr._is_id:
    902             idAttr._is_id = True
    903             self._magic_id_nodes += 1
    904             self.ownerDocument._magic_id_count += 1
    905             _clear_id_cache(self)
    906 
    907 defproperty(Element, "attributes",
    908             doc="NamedNodeMap of attributes on the element.")
    909 defproperty(Element, "localName",
    910             doc="Namespace-local name of this element.")
    911 
    912 
    913 def _set_attribute_node(element, attr):
    914     _clear_id_cache(element)
    915     element._ensure_attributes()
    916     element._attrs[attr.name] = attr
    917     element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
    918 
    919     # This creates a circular reference, but Element.unlink()
    920     # breaks the cycle since the references to the attribute
    921     # dictionaries are tossed.
    922     attr.ownerElement = element
    923 
    924 class Childless:
    925     """Mixin that makes childless-ness easy to implement and avoids
    926     the complexity of the Node methods that deal with children.
    927     """
    928     __slots__ = ()
    929 
    930     attributes = None
    931     childNodes = EmptyNodeList()
    932     firstChild = None
    933     lastChild = None
    934 
    935     def _get_firstChild(self):
    936         return None
    937 
    938     def _get_lastChild(self):
    939         return None
    940 
    941     def appendChild(self, node):
    942         raise xml.dom.HierarchyRequestErr(
    943             self.nodeName + " nodes cannot have children")
    944 
    945     def hasChildNodes(self):
    946         return False
    947 
    948     def insertBefore(self, newChild, refChild):
    949         raise xml.dom.HierarchyRequestErr(
    950             self.nodeName + " nodes do not have children")
    951 
    952     def removeChild(self, oldChild):
    953         raise xml.dom.NotFoundErr(
    954             self.nodeName + " nodes do not have children")
    955 
    956     def normalize(self):
    957         # For childless nodes, normalize() has nothing to do.
    958         pass
    959 
    960     def replaceChild(self, newChild, oldChild):
    961         raise xml.dom.HierarchyRequestErr(
    962             self.nodeName + " nodes do not have children")
    963 
    964 
    965 class ProcessingInstruction(Childless, Node):
    966     nodeType = Node.PROCESSING_INSTRUCTION_NODE
    967     __slots__ = ('target', 'data')
    968 
    969     def __init__(self, target, data):
    970         self.target = target
    971         self.data = data
    972 
    973     # nodeValue is an alias for data
    974     def _get_nodeValue(self):
    975         return self.data
    976     def _set_nodeValue(self, value):
    977         self.data = value
    978     nodeValue = property(_get_nodeValue, _set_nodeValue)
    979 
    980     # nodeName is an alias for target
    981     def _get_nodeName(self):
    982         return self.target
    983     def _set_nodeName(self, value):
    984         self.target = value
    985     nodeName = property(_get_nodeName, _set_nodeName)
    986 
    987     def writexml(self, writer, indent="", addindent="", newl=""):
    988         writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
    989 
    990 
    991 class CharacterData(Childless, Node):
    992     __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling')
    993 
    994     def __init__(self):
    995         self.ownerDocument = self.parentNode = None
    996         self.previousSibling = self.nextSibling = None
    997         self._data = ''
    998         Node.__init__(self)
    999 
   1000     def _get_length(self):
   1001         return len(self.data)
   1002     __len__ = _get_length
   1003 
   1004     def _get_data(self):
   1005         return self._data
   1006     def _set_data(self, data):
   1007         self._data = data
   1008 
   1009     data = nodeValue = property(_get_data, _set_data)
   1010 
   1011     def __repr__(self):
   1012         data = self.data
   1013         if len(data) > 10:
   1014             dotdotdot = "..."
   1015         else:
   1016             dotdotdot = ""
   1017         return '<DOM %s node "%r%s">' % (
   1018             self.__class__.__name__, data[0:10], dotdotdot)
   1019 
   1020     def substringData(self, offset, count):
   1021         if offset < 0:
   1022             raise xml.dom.IndexSizeErr("offset cannot be negative")
   1023         if offset >= len(self.data):
   1024             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
   1025         if count < 0:
   1026             raise xml.dom.IndexSizeErr("count cannot be negative")
   1027         return self.data[offset:offset+count]
   1028 
   1029     def appendData(self, arg):
   1030         self.data = self.data + arg
   1031 
   1032     def insertData(self, offset, arg):
   1033         if offset < 0:
   1034             raise xml.dom.IndexSizeErr("offset cannot be negative")
   1035         if offset >= len(self.data):
   1036             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
   1037         if arg:
   1038             self.data = "%s%s%s" % (
   1039                 self.data[:offset], arg, self.data[offset:])
   1040 
   1041     def deleteData(self, offset, count):
   1042         if offset < 0:
   1043             raise xml.dom.IndexSizeErr("offset cannot be negative")
   1044         if offset >= len(self.data):
   1045             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
   1046         if count < 0:
   1047             raise xml.dom.IndexSizeErr("count cannot be negative")
   1048         if count:
   1049             self.data = self.data[:offset] + self.data[offset+count:]
   1050 
   1051     def replaceData(self, offset, count, arg):
   1052         if offset < 0:
   1053             raise xml.dom.IndexSizeErr("offset cannot be negative")
   1054         if offset >= len(self.data):
   1055             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
   1056         if count < 0:
   1057             raise xml.dom.IndexSizeErr("count cannot be negative")
   1058         if count:
   1059             self.data = "%s%s%s" % (
   1060                 self.data[:offset], arg, self.data[offset+count:])
   1061 
   1062 defproperty(CharacterData, "length", doc="Length of the string data.")
   1063 
   1064 
   1065 class Text(CharacterData):
   1066     __slots__ = ()
   1067 
   1068     nodeType = Node.TEXT_NODE
   1069     nodeName = "#text"
   1070     attributes = None
   1071 
   1072     def splitText(self, offset):
   1073         if offset < 0 or offset > len(self.data):
   1074             raise xml.dom.IndexSizeErr("illegal offset value")
   1075         newText = self.__class__()
   1076         newText.data = self.data[offset:]
   1077         newText.ownerDocument = self.ownerDocument
   1078         next = self.nextSibling
   1079         if self.parentNode and self in self.parentNode.childNodes:
   1080             if next is None:
   1081                 self.parentNode.appendChild(newText)
   1082             else:
   1083                 self.parentNode.insertBefore(newText, next)
   1084         self.data = self.data[:offset]
   1085         return newText
   1086 
   1087     def writexml(self, writer, indent="", addindent="", newl=""):
   1088         _write_data(writer, "%s%s%s" % (indent, self.data, newl))
   1089 
   1090     # DOM Level 3 (WD 9 April 2002)
   1091 
   1092     def _get_wholeText(self):
   1093         L = [self.data]
   1094         n = self.previousSibling
   1095         while n is not None:
   1096             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1097                 L.insert(0, n.data)
   1098                 n = n.previousSibling
   1099             else:
   1100                 break
   1101         n = self.nextSibling
   1102         while n is not None:
   1103             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1104                 L.append(n.data)
   1105                 n = n.nextSibling
   1106             else:
   1107                 break
   1108         return ''.join(L)
   1109 
   1110     def replaceWholeText(self, content):
   1111         # XXX This needs to be seriously changed if minidom ever
   1112         # supports EntityReference nodes.
   1113         parent = self.parentNode
   1114         n = self.previousSibling
   1115         while n is not None:
   1116             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1117                 next = n.previousSibling
   1118                 parent.removeChild(n)
   1119                 n = next
   1120             else:
   1121                 break
   1122         n = self.nextSibling
   1123         if not content:
   1124             parent.removeChild(self)
   1125         while n is not None:
   1126             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1127                 next = n.nextSibling
   1128                 parent.removeChild(n)
   1129                 n = next
   1130             else:
   1131                 break
   1132         if content:
   1133             self.data = content
   1134             return self
   1135         else:
   1136             return None
   1137 
   1138     def _get_isWhitespaceInElementContent(self):
   1139         if self.data.strip():
   1140             return False
   1141         elem = _get_containing_element(self)
   1142         if elem is None:
   1143             return False
   1144         info = self.ownerDocument._get_elem_info(elem)
   1145         if info is None:
   1146             return False
   1147         else:
   1148             return info.isElementContent()
   1149 
   1150 defproperty(Text, "isWhitespaceInElementContent",
   1151             doc="True iff this text node contains only whitespace"
   1152                 " and is in element content.")
   1153 defproperty(Text, "wholeText",
   1154             doc="The text of all logically-adjacent text nodes.")
   1155 
   1156 
   1157 def _get_containing_element(node):
   1158     c = node.parentNode
   1159     while c is not None:
   1160         if c.nodeType == Node.ELEMENT_NODE:
   1161             return c
   1162         c = c.parentNode
   1163     return None
   1164 
   1165 def _get_containing_entref(node):
   1166     c = node.parentNode
   1167     while c is not None:
   1168         if c.nodeType == Node.ENTITY_REFERENCE_NODE:
   1169             return c
   1170         c = c.parentNode
   1171     return None
   1172 
   1173 
   1174 class Comment(CharacterData):
   1175     nodeType = Node.COMMENT_NODE
   1176     nodeName = "#comment"
   1177 
   1178     def __init__(self, data):
   1179         CharacterData.__init__(self)
   1180         self._data = data
   1181 
   1182     def writexml(self, writer, indent="", addindent="", newl=""):
   1183         if "--" in self.data:
   1184             raise ValueError("'--' is not allowed in a comment node")
   1185         writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
   1186 
   1187 
   1188 class CDATASection(Text):
   1189     __slots__ = ()
   1190 
   1191     nodeType = Node.CDATA_SECTION_NODE
   1192     nodeName = "#cdata-section"
   1193 
   1194     def writexml(self, writer, indent="", addindent="", newl=""):
   1195         if self.data.find("]]>") >= 0:
   1196             raise ValueError("']]>' not allowed in a CDATA section")
   1197         writer.write("<![CDATA[%s]]>" % self.data)
   1198 
   1199 
   1200 class ReadOnlySequentialNamedNodeMap(object):
   1201     __slots__ = '_seq',
   1202 
   1203     def __init__(self, seq=()):
   1204         # seq should be a list or tuple
   1205         self._seq = seq
   1206 
   1207     def __len__(self):
   1208         return len(self._seq)
   1209 
   1210     def _get_length(self):
   1211         return len(self._seq)
   1212 
   1213     def getNamedItem(self, name):
   1214         for n in self._seq:
   1215             if n.nodeName == name:
   1216                 return n
   1217 
   1218     def getNamedItemNS(self, namespaceURI, localName):
   1219         for n in self._seq:
   1220             if n.namespaceURI == namespaceURI and n.localName == localName:
   1221                 return n
   1222 
   1223     def __getitem__(self, name_or_tuple):
   1224         if isinstance(name_or_tuple, tuple):
   1225             node = self.getNamedItemNS(*name_or_tuple)
   1226         else:
   1227             node = self.getNamedItem(name_or_tuple)
   1228         if node is None:
   1229             raise KeyError(name_or_tuple)
   1230         return node
   1231 
   1232     def item(self, index):
   1233         if index < 0:
   1234             return None
   1235         try:
   1236             return self._seq[index]
   1237         except IndexError:
   1238             return None
   1239 
   1240     def removeNamedItem(self, name):
   1241         raise xml.dom.NoModificationAllowedErr(
   1242             "NamedNodeMap instance is read-only")
   1243 
   1244     def removeNamedItemNS(self, namespaceURI, localName):
   1245         raise xml.dom.NoModificationAllowedErr(
   1246             "NamedNodeMap instance is read-only")
   1247 
   1248     def setNamedItem(self, node):
   1249         raise xml.dom.NoModificationAllowedErr(
   1250             "NamedNodeMap instance is read-only")
   1251 
   1252     def setNamedItemNS(self, node):
   1253         raise xml.dom.NoModificationAllowedErr(
   1254             "NamedNodeMap instance is read-only")
   1255 
   1256     def __getstate__(self):
   1257         return [self._seq]
   1258 
   1259     def __setstate__(self, state):
   1260         self._seq = state[0]
   1261 
   1262 defproperty(ReadOnlySequentialNamedNodeMap, "length",
   1263             doc="Number of entries in the NamedNodeMap.")
   1264 
   1265 
   1266 class Identified:
   1267     """Mix-in class that supports the publicId and systemId attributes."""
   1268 
   1269     __slots__ = 'publicId', 'systemId'
   1270 
   1271     def _identified_mixin_init(self, publicId, systemId):
   1272         self.publicId = publicId
   1273         self.systemId = systemId
   1274 
   1275     def _get_publicId(self):
   1276         return self.publicId
   1277 
   1278     def _get_systemId(self):
   1279         return self.systemId
   1280 
   1281 class DocumentType(Identified, Childless, Node):
   1282     nodeType = Node.DOCUMENT_TYPE_NODE
   1283     nodeValue = None
   1284     name = None
   1285     publicId = None
   1286     systemId = None
   1287     internalSubset = None
   1288 
   1289     def __init__(self, qualifiedName):
   1290         self.entities = ReadOnlySequentialNamedNodeMap()
   1291         self.notations = ReadOnlySequentialNamedNodeMap()
   1292         if qualifiedName:
   1293             prefix, localname = _nssplit(qualifiedName)
   1294             self.name = localname
   1295         self.nodeName = self.name
   1296 
   1297     def _get_internalSubset(self):
   1298         return self.internalSubset
   1299 
   1300     def cloneNode(self, deep):
   1301         if self.ownerDocument is None:
   1302             # it's ok
   1303             clone = DocumentType(None)
   1304             clone.name = self.name
   1305             clone.nodeName = self.name
   1306             operation = xml.dom.UserDataHandler.NODE_CLONED
   1307             if deep:
   1308                 clone.entities._seq = []
   1309                 clone.notations._seq = []
   1310                 for n in self.notations._seq:
   1311                     notation = Notation(n.nodeName, n.publicId, n.systemId)
   1312                     clone.notations._seq.append(notation)
   1313                     n._call_user_data_handler(operation, n, notation)
   1314                 for e in self.entities._seq:
   1315                     entity = Entity(e.nodeName, e.publicId, e.systemId,
   1316                                     e.notationName)
   1317                     entity.actualEncoding = e.actualEncoding
   1318                     entity.encoding = e.encoding
   1319                     entity.version = e.version
   1320                     clone.entities._seq.append(entity)
   1321                     e._call_user_data_handler(operation, n, entity)
   1322             self._call_user_data_handler(operation, self, clone)
   1323             return clone
   1324         else:
   1325             return None
   1326 
   1327     def writexml(self, writer, indent="", addindent="", newl=""):
   1328         writer.write("<!DOCTYPE ")
   1329         writer.write(self.name)
   1330         if self.publicId:
   1331             writer.write("%s  PUBLIC '%s'%s  '%s'"
   1332                          % (newl, self.publicId, newl, self.systemId))
   1333         elif self.systemId:
   1334             writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
   1335         if self.internalSubset is not None:
   1336             writer.write(" [")
   1337             writer.write(self.internalSubset)
   1338             writer.write("]")
   1339         writer.write(">"+newl)
   1340 
   1341 class Entity(Identified, Node):
   1342     attributes = None
   1343     nodeType = Node.ENTITY_NODE
   1344     nodeValue = None
   1345 
   1346     actualEncoding = None
   1347     encoding = None
   1348     version = None
   1349 
   1350     def __init__(self, name, publicId, systemId, notation):
   1351         self.nodeName = name
   1352         self.notationName = notation
   1353         self.childNodes = NodeList()
   1354         self._identified_mixin_init(publicId, systemId)
   1355 
   1356     def _get_actualEncoding(self):
   1357         return self.actualEncoding
   1358 
   1359     def _get_encoding(self):
   1360         return self.encoding
   1361 
   1362     def _get_version(self):
   1363         return self.version
   1364 
   1365     def appendChild(self, newChild):
   1366         raise xml.dom.HierarchyRequestErr(
   1367             "cannot append children to an entity node")
   1368 
   1369     def insertBefore(self, newChild, refChild):
   1370         raise xml.dom.HierarchyRequestErr(
   1371             "cannot insert children below an entity node")
   1372 
   1373     def removeChild(self, oldChild):
   1374         raise xml.dom.HierarchyRequestErr(
   1375             "cannot remove children from an entity node")
   1376 
   1377     def replaceChild(self, newChild, oldChild):
   1378         raise xml.dom.HierarchyRequestErr(
   1379             "cannot replace children of an entity node")
   1380 
   1381 class Notation(Identified, Childless, Node):
   1382     nodeType = Node.NOTATION_NODE
   1383     nodeValue = None
   1384 
   1385     def __init__(self, name, publicId, systemId):
   1386         self.nodeName = name
   1387         self._identified_mixin_init(publicId, systemId)
   1388 
   1389 
   1390 class DOMImplementation(DOMImplementationLS):
   1391     _features = [("core", "1.0"),
   1392                  ("core", "2.0"),
   1393                  ("core", None),
   1394                  ("xml", "1.0"),
   1395                  ("xml", "2.0"),
   1396                  ("xml", None),
   1397                  ("ls-load", "3.0"),
   1398                  ("ls-load", None),
   1399                  ]
   1400 
   1401     def hasFeature(self, feature, version):
   1402         if version == "":
   1403             version = None
   1404         return (feature.lower(), version) in self._features
   1405 
   1406     def createDocument(self, namespaceURI, qualifiedName, doctype):
   1407         if doctype and doctype.parentNode is not None:
   1408             raise xml.dom.WrongDocumentErr(
   1409                 "doctype object owned by another DOM tree")
   1410         doc = self._create_document()
   1411 
   1412         add_root_element = not (namespaceURI is None
   1413                                 and qualifiedName is None
   1414                                 and doctype is None)
   1415 
   1416         if not qualifiedName and add_root_element:
   1417             # The spec is unclear what to raise here; SyntaxErr
   1418             # would be the other obvious candidate. Since Xerces raises
   1419             # InvalidCharacterErr, and since SyntaxErr is not listed
   1420             # for createDocument, that seems to be the better choice.
   1421             # XXX: need to check for illegal characters here and in
   1422             # createElement.
   1423 
   1424             # DOM Level III clears this up when talking about the return value
   1425             # of this function.  If namespaceURI, qName and DocType are
   1426             # Null the document is returned without a document element
   1427             # Otherwise if doctype or namespaceURI are not None
   1428             # Then we go back to the above problem
   1429             raise xml.dom.InvalidCharacterErr("Element with no name")
   1430 
   1431         if add_root_element:
   1432             prefix, localname = _nssplit(qualifiedName)
   1433             if prefix == "xml" \
   1434                and namespaceURI != "http://www.w3.org/XML/1998/namespace":
   1435                 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
   1436             if prefix and not namespaceURI:
   1437                 raise xml.dom.NamespaceErr(
   1438                     "illegal use of prefix without namespaces")
   1439             element = doc.createElementNS(namespaceURI, qualifiedName)
   1440             if doctype:
   1441                 doc.appendChild(doctype)
   1442             doc.appendChild(element)
   1443 
   1444         if doctype:
   1445             doctype.parentNode = doctype.ownerDocument = doc
   1446 
   1447         doc.doctype = doctype
   1448         doc.implementation = self
   1449         return doc
   1450 
   1451     def createDocumentType(self, qualifiedName, publicId, systemId):
   1452         doctype = DocumentType(qualifiedName)
   1453         doctype.publicId = publicId
   1454         doctype.systemId = systemId
   1455         return doctype
   1456 
   1457     # DOM Level 3 (WD 9 April 2002)
   1458 
   1459     def getInterface(self, feature):
   1460         if self.hasFeature(feature, None):
   1461             return self
   1462         else:
   1463             return None
   1464 
   1465     # internal
   1466     def _create_document(self):
   1467         return Document()
   1468 
   1469 class ElementInfo(object):
   1470     """Object that represents content-model information for an element.
   1471 
   1472     This implementation is not expected to be used in practice; DOM
   1473     builders should provide implementations which do the right thing
   1474     using information available to it.
   1475 
   1476     """
   1477 
   1478     __slots__ = 'tagName',
   1479 
   1480     def __init__(self, name):
   1481         self.tagName = name
   1482 
   1483     def getAttributeType(self, aname):
   1484         return _no_type
   1485 
   1486     def getAttributeTypeNS(self, namespaceURI, localName):
   1487         return _no_type
   1488 
   1489     def isElementContent(self):
   1490         return False
   1491 
   1492     def isEmpty(self):
   1493         """Returns true iff this element is declared to have an EMPTY
   1494         content model."""
   1495         return False
   1496 
   1497     def isId(self, aname):
   1498         """Returns true iff the named attribute is a DTD-style ID."""
   1499         return False
   1500 
   1501     def isIdNS(self, namespaceURI, localName):
   1502         """Returns true iff the identified attribute is a DTD-style ID."""
   1503         return False
   1504 
   1505     def __getstate__(self):
   1506         return self.tagName
   1507 
   1508     def __setstate__(self, state):
   1509         self.tagName = state
   1510 
   1511 def _clear_id_cache(node):
   1512     if node.nodeType == Node.DOCUMENT_NODE:
   1513         node._id_cache.clear()
   1514         node._id_search_stack = None
   1515     elif _in_document(node):
   1516         node.ownerDocument._id_cache.clear()
   1517         node.ownerDocument._id_search_stack= None
   1518 
   1519 class Document(Node, DocumentLS):
   1520     __slots__ = ('_elem_info', 'doctype',
   1521                  '_id_search_stack', 'childNodes', '_id_cache')
   1522     _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
   1523                          Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
   1524 
   1525     implementation = DOMImplementation()
   1526     nodeType = Node.DOCUMENT_NODE
   1527     nodeName = "#document"
   1528     nodeValue = None
   1529     attributes = None
   1530     parentNode = None
   1531     previousSibling = nextSibling = None
   1532 
   1533 
   1534     # Document attributes from Level 3 (WD 9 April 2002)
   1535 
   1536     actualEncoding = None
   1537     encoding = None
   1538     standalone = None
   1539     version = None
   1540     strictErrorChecking = False
   1541     errorHandler = None
   1542     documentURI = None
   1543 
   1544     _magic_id_count = 0
   1545 
   1546     def __init__(self):
   1547         self.doctype = None
   1548         self.childNodes = NodeList()
   1549         # mapping of (namespaceURI, localName) -> ElementInfo
   1550         #        and tagName -> ElementInfo
   1551         self._elem_info = {}
   1552         self._id_cache = {}
   1553         self._id_search_stack = None
   1554 
   1555     def _get_elem_info(self, element):
   1556         if element.namespaceURI:
   1557             key = element.namespaceURI, element.localName
   1558         else:
   1559             key = element.tagName
   1560         return self._elem_info.get(key)
   1561 
   1562     def _get_actualEncoding(self):
   1563         return self.actualEncoding
   1564 
   1565     def _get_doctype(self):
   1566         return self.doctype
   1567 
   1568     def _get_documentURI(self):
   1569         return self.documentURI
   1570 
   1571     def _get_encoding(self):
   1572         return self.encoding
   1573 
   1574     def _get_errorHandler(self):
   1575         return self.errorHandler
   1576 
   1577     def _get_standalone(self):
   1578         return self.standalone
   1579 
   1580     def _get_strictErrorChecking(self):
   1581         return self.strictErrorChecking
   1582 
   1583     def _get_version(self):
   1584         return self.version
   1585 
   1586     def appendChild(self, node):
   1587         if node.nodeType not in self._child_node_types:
   1588             raise xml.dom.HierarchyRequestErr(
   1589                 "%s cannot be child of %s" % (repr(node), repr(self)))
   1590         if node.parentNode is not None:
   1591             # This needs to be done before the next test since this
   1592             # may *be* the document element, in which case it should
   1593             # end up re-ordered to the end.
   1594             node.parentNode.removeChild(node)
   1595 
   1596         if node.nodeType == Node.ELEMENT_NODE \
   1597            and self._get_documentElement():
   1598             raise xml.dom.HierarchyRequestErr(
   1599                 "two document elements disallowed")
   1600         return Node.appendChild(self, node)
   1601 
   1602     def removeChild(self, oldChild):
   1603         try:
   1604             self.childNodes.remove(oldChild)
   1605         except ValueError:
   1606             raise xml.dom.NotFoundErr()
   1607         oldChild.nextSibling = oldChild.previousSibling = None
   1608         oldChild.parentNode = None
   1609         if self.documentElement is oldChild:
   1610             self.documentElement = None
   1611 
   1612         return oldChild
   1613 
   1614     def _get_documentElement(self):
   1615         for node in self.childNodes:
   1616             if node.nodeType == Node.ELEMENT_NODE:
   1617                 return node
   1618 
   1619     def unlink(self):
   1620         if self.doctype is not None:
   1621             self.doctype.unlink()
   1622             self.doctype = None
   1623         Node.unlink(self)
   1624 
   1625     def cloneNode(self, deep):
   1626         if not deep:
   1627             return None
   1628         clone = self.implementation.createDocument(None, None, None)
   1629         clone.encoding = self.encoding
   1630         clone.standalone = self.standalone
   1631         clone.version = self.version
   1632         for n in self.childNodes:
   1633             childclone = _clone_node(n, deep, clone)
   1634             assert childclone.ownerDocument.isSameNode(clone)
   1635             clone.childNodes.append(childclone)
   1636             if childclone.nodeType == Node.DOCUMENT_NODE:
   1637                 assert clone.documentElement is None
   1638             elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
   1639                 assert clone.doctype is None
   1640                 clone.doctype = childclone
   1641             childclone.parentNode = clone
   1642         self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
   1643                                      self, clone)
   1644         return clone
   1645 
   1646     def createDocumentFragment(self):
   1647         d = DocumentFragment()
   1648         d.ownerDocument = self
   1649         return d
   1650 
   1651     def createElement(self, tagName):
   1652         e = Element(tagName)
   1653         e.ownerDocument = self
   1654         return e
   1655 
   1656     def createTextNode(self, data):
   1657         if not isinstance(data, str):
   1658             raise TypeError("node contents must be a string")
   1659         t = Text()
   1660         t.data = data
   1661         t.ownerDocument = self
   1662         return t
   1663 
   1664     def createCDATASection(self, data):
   1665         if not isinstance(data, str):
   1666             raise TypeError("node contents must be a string")
   1667         c = CDATASection()
   1668         c.data = data
   1669         c.ownerDocument = self
   1670         return c
   1671 
   1672     def createComment(self, data):
   1673         c = Comment(data)
   1674         c.ownerDocument = self
   1675         return c
   1676 
   1677     def createProcessingInstruction(self, target, data):
   1678         p = ProcessingInstruction(target, data)
   1679         p.ownerDocument = self
   1680         return p
   1681 
   1682     def createAttribute(self, qName):
   1683         a = Attr(qName)
   1684         a.ownerDocument = self
   1685         a.value = ""
   1686         return a
   1687 
   1688     def createElementNS(self, namespaceURI, qualifiedName):
   1689         prefix, localName = _nssplit(qualifiedName)
   1690         e = Element(qualifiedName, namespaceURI, prefix)
   1691         e.ownerDocument = self
   1692         return e
   1693 
   1694     def createAttributeNS(self, namespaceURI, qualifiedName):
   1695         prefix, localName = _nssplit(qualifiedName)
   1696         a = Attr(qualifiedName, namespaceURI, localName, prefix)
   1697         a.ownerDocument = self
   1698         a.value = ""
   1699         return a
   1700 
   1701     # A couple of implementation-specific helpers to create node types
   1702     # not supported by the W3C DOM specs:
   1703 
   1704     def _create_entity(self, name, publicId, systemId, notationName):
   1705         e = Entity(name, publicId, systemId, notationName)
   1706         e.ownerDocument = self
   1707         return e
   1708 
   1709     def _create_notation(self, name, publicId, systemId):
   1710         n = Notation(name, publicId, systemId)
   1711         n.ownerDocument = self
   1712         return n
   1713 
   1714     def getElementById(self, id):
   1715         if id in self._id_cache:
   1716             return self._id_cache[id]
   1717         if not (self._elem_info or self._magic_id_count):
   1718             return None
   1719 
   1720         stack = self._id_search_stack
   1721         if stack is None:
   1722             # we never searched before, or the cache has been cleared
   1723             stack = [self.documentElement]
   1724             self._id_search_stack = stack
   1725         elif not stack:
   1726             # Previous search was completed and cache is still valid;
   1727             # no matching node.
   1728             return None
   1729 
   1730         result = None
   1731         while stack:
   1732             node = stack.pop()
   1733             # add child elements to stack for continued searching
   1734             stack.extend([child for child in node.childNodes
   1735                           if child.nodeType in _nodeTypes_with_children])
   1736             # check this node
   1737             info = self._get_elem_info(node)
   1738             if info:
   1739                 # We have to process all ID attributes before
   1740                 # returning in order to get all the attributes set to
   1741                 # be IDs using Element.setIdAttribute*().
   1742                 for attr in node.attributes.values():
   1743                     if attr.namespaceURI:
   1744                         if info.isIdNS(attr.namespaceURI, attr.localName):
   1745                             self._id_cache[attr.value] = node
   1746                             if attr.value == id:
   1747                                 result = node
   1748                             elif not node._magic_id_nodes:
   1749                                 break
   1750                     elif info.isId(attr.name):
   1751                         self._id_cache[attr.value] = node
   1752                         if attr.value == id:
   1753                             result = node
   1754                         elif not node._magic_id_nodes:
   1755                             break
   1756                     elif attr._is_id:
   1757                         self._id_cache[attr.value] = node
   1758                         if attr.value == id:
   1759                             result = node
   1760                         elif node._magic_id_nodes == 1:
   1761                             break
   1762             elif node._magic_id_nodes:
   1763                 for attr in node.attributes.values():
   1764                     if attr._is_id:
   1765                         self._id_cache[attr.value] = node
   1766                         if attr.value == id:
   1767                             result = node
   1768             if result is not None:
   1769                 break
   1770         return result
   1771 
   1772     def getElementsByTagName(self, name):
   1773         return _get_elements_by_tagName_helper(self, name, NodeList())
   1774 
   1775     def getElementsByTagNameNS(self, namespaceURI, localName):
   1776         return _get_elements_by_tagName_ns_helper(
   1777             self, namespaceURI, localName, NodeList())
   1778 
   1779     def isSupported(self, feature, version):
   1780         return self.implementation.hasFeature(feature, version)
   1781 
   1782     def importNode(self, node, deep):
   1783         if node.nodeType == Node.DOCUMENT_NODE:
   1784             raise xml.dom.NotSupportedErr("cannot import document nodes")
   1785         elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
   1786             raise xml.dom.NotSupportedErr("cannot import document type nodes")
   1787         return _clone_node(node, deep, self)
   1788 
   1789     def writexml(self, writer, indent="", addindent="", newl="", encoding=None):
   1790         if encoding is None:
   1791             writer.write('<?xml version="1.0" ?>'+newl)
   1792         else:
   1793             writer.write('<?xml version="1.0" encoding="%s"?>%s' % (
   1794                 encoding, newl))
   1795         for node in self.childNodes:
   1796             node.writexml(writer, indent, addindent, newl)
   1797 
   1798     # DOM Level 3 (WD 9 April 2002)
   1799 
   1800     def renameNode(self, n, namespaceURI, name):
   1801         if n.ownerDocument is not self:
   1802             raise xml.dom.WrongDocumentErr(
   1803                 "cannot rename nodes from other documents;\n"
   1804                 "expected %s,\nfound %s" % (self, n.ownerDocument))
   1805         if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
   1806             raise xml.dom.NotSupportedErr(
   1807                 "renameNode() only applies to element and attribute nodes")
   1808         if namespaceURI != EMPTY_NAMESPACE:
   1809             if ':' in name:
   1810                 prefix, localName = name.split(':', 1)
   1811                 if (  prefix == "xmlns"
   1812                       and namespaceURI != xml.dom.XMLNS_NAMESPACE):
   1813                     raise xml.dom.NamespaceErr(
   1814                         "illegal use of 'xmlns' prefix")
   1815             else:
   1816                 if (  name == "xmlns"
   1817                       and namespaceURI != xml.dom.XMLNS_NAMESPACE
   1818                       and n.nodeType == Node.ATTRIBUTE_NODE):
   1819                     raise xml.dom.NamespaceErr(
   1820                         "illegal use of the 'xmlns' attribute")
   1821                 prefix = None
   1822                 localName = name
   1823         else:
   1824             prefix = None
   1825             localName = None
   1826         if n.nodeType == Node.ATTRIBUTE_NODE:
   1827             element = n.ownerElement
   1828             if element is not None:
   1829                 is_id = n._is_id
   1830                 element.removeAttributeNode(n)
   1831         else:
   1832             element = None
   1833         n.prefix = prefix
   1834         n._localName = localName
   1835         n.namespaceURI = namespaceURI
   1836         n.nodeName = name
   1837         if n.nodeType == Node.ELEMENT_NODE:
   1838             n.tagName = name
   1839         else:
   1840             # attribute node
   1841             n.name = name
   1842             if element is not None:
   1843                 element.setAttributeNode(n)
   1844                 if is_id:
   1845                     element.setIdAttributeNode(n)
   1846         # It's not clear from a semantic perspective whether we should
   1847         # call the user data handlers for the NODE_RENAMED event since
   1848         # we're re-using the existing node.  The draft spec has been
   1849         # interpreted as meaning "no, don't call the handler unless a
   1850         # new node is created."
   1851         return n
   1852 
   1853 defproperty(Document, "documentElement",
   1854             doc="Top-level element of this document.")
   1855 
   1856 
   1857 def _clone_node(node, deep, newOwnerDocument):
   1858     """
   1859     Clone a node and give it the new owner document.
   1860     Called by Node.cloneNode and Document.importNode
   1861     """
   1862     if node.ownerDocument.isSameNode(newOwnerDocument):
   1863         operation = xml.dom.UserDataHandler.NODE_CLONED
   1864     else:
   1865         operation = xml.dom.UserDataHandler.NODE_IMPORTED
   1866     if node.nodeType == Node.ELEMENT_NODE:
   1867         clone = newOwnerDocument.createElementNS(node.namespaceURI,
   1868                                                  node.nodeName)
   1869         for attr in node.attributes.values():
   1870             clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
   1871             a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
   1872             a.specified = attr.specified
   1873 
   1874         if deep:
   1875             for child in node.childNodes:
   1876                 c = _clone_node(child, deep, newOwnerDocument)
   1877                 clone.appendChild(c)
   1878 
   1879     elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
   1880         clone = newOwnerDocument.createDocumentFragment()
   1881         if deep:
   1882             for child in node.childNodes:
   1883                 c = _clone_node(child, deep, newOwnerDocument)
   1884                 clone.appendChild(c)
   1885 
   1886     elif node.nodeType == Node.TEXT_NODE:
   1887         clone = newOwnerDocument.createTextNode(node.data)
   1888     elif node.nodeType == Node.CDATA_SECTION_NODE:
   1889         clone = newOwnerDocument.createCDATASection(node.data)
   1890     elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
   1891         clone = newOwnerDocument.createProcessingInstruction(node.target,
   1892                                                              node.data)
   1893     elif node.nodeType == Node.COMMENT_NODE:
   1894         clone = newOwnerDocument.createComment(node.data)
   1895     elif node.nodeType == Node.ATTRIBUTE_NODE:
   1896         clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
   1897                                                    node.nodeName)
   1898         clone.specified = True
   1899         clone.value = node.value
   1900     elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
   1901         assert node.ownerDocument is not newOwnerDocument
   1902         operation = xml.dom.UserDataHandler.NODE_IMPORTED
   1903         clone = newOwnerDocument.implementation.createDocumentType(
   1904             node.name, node.publicId, node.systemId)
   1905         clone.ownerDocument = newOwnerDocument
   1906         if deep:
   1907             clone.entities._seq = []
   1908             clone.notations._seq = []
   1909             for n in node.notations._seq:
   1910                 notation = Notation(n.nodeName, n.publicId, n.systemId)
   1911                 notation.ownerDocument = newOwnerDocument
   1912                 clone.notations._seq.append(notation)
   1913                 if hasattr(n, '_call_user_data_handler'):
   1914                     n._call_user_data_handler(operation, n, notation)
   1915             for e in node.entities._seq:
   1916                 entity = Entity(e.nodeName, e.publicId, e.systemId,
   1917                                 e.notationName)
   1918                 entity.actualEncoding = e.actualEncoding
   1919                 entity.encoding = e.encoding
   1920                 entity.version = e.version
   1921                 entity.ownerDocument = newOwnerDocument
   1922                 clone.entities._seq.append(entity)
   1923                 if hasattr(e, '_call_user_data_handler'):
   1924                     e._call_user_data_handler(operation, n, entity)
   1925     else:
   1926         # Note the cloning of Document and DocumentType nodes is
   1927         # implementation specific.  minidom handles those cases
   1928         # directly in the cloneNode() methods.
   1929         raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
   1930 
   1931     # Check for _call_user_data_handler() since this could conceivably
   1932     # used with other DOM implementations (one of the FourThought
   1933     # DOMs, perhaps?).
   1934     if hasattr(node, '_call_user_data_handler'):
   1935         node._call_user_data_handler(operation, node, clone)
   1936     return clone
   1937 
   1938 
   1939 def _nssplit(qualifiedName):
   1940     fields = qualifiedName.split(':', 1)
   1941     if len(fields) == 2:
   1942         return fields
   1943     else:
   1944         return (None, fields[0])
   1945 
   1946 
   1947 def _do_pulldom_parse(func, args, kwargs):
   1948     events = func(*args, **kwargs)
   1949     toktype, rootNode = events.getEvent()
   1950     events.expandNode(rootNode)
   1951     events.clear()
   1952     return rootNode
   1953 
   1954 def parse(file, parser=None, bufsize=None):
   1955     """Parse a file into a DOM by filename or file object."""
   1956     if parser is None and not bufsize:
   1957         from xml.dom import expatbuilder
   1958         return expatbuilder.parse(file)
   1959     else:
   1960         from xml.dom import pulldom
   1961         return _do_pulldom_parse(pulldom.parse, (file,),
   1962             {'parser': parser, 'bufsize': bufsize})
   1963 
   1964 def parseString(string, parser=None):
   1965     """Parse a file into a DOM from a string."""
   1966     if parser is None:
   1967         from xml.dom import expatbuilder
   1968         return expatbuilder.parseString(string)
   1969     else:
   1970         from xml.dom import pulldom
   1971         return _do_pulldom_parse(pulldom.parseString, (string,),
   1972                                  {'parser': parser})
   1973 
   1974 def getDOMImplementation(features=None):
   1975     if features:
   1976         if isinstance(features, str):
   1977             features = domreg._parse_feature_string(features)
   1978         for f, v in features:
   1979             if not Document.implementation.hasFeature(f, v):
   1980                 return None
   1981     return Document.implementation
   1982