Home | History | Annotate | Download | only in dom
      1 """Simple implementation of the Level 1 DOM.
      2 
      3 Namespaces and other minor Level 2 features are also supported.
      4 
      5 parse("foo.xml")
      6 
      7 parseString("<foo><bar/></foo>")
      8 
      9 Todo:
     10 =====
     11  * convenience methods for getting elements and text.
     12  * more testing
     13  * bring some of the writer and linearizer code into conformance with this
     14         interface
     15  * SAX 2 namespaces
     16 """
     17 
     18 import xml.dom
     19 
     20 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
     21 from xml.dom.minicompat import *
     22 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
     23 
     24 # This is used by the ID-cache invalidation checks; the list isn't

     25 # actually complete, since the nodes being checked will never be the

     26 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is

     27 # the node being added or removed, not the node being modified.)

     28 #

     29 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
     30                             xml.dom.Node.ENTITY_REFERENCE_NODE)
     31 
     32 
     33 class Node(xml.dom.Node):
     34     namespaceURI = None # this is non-null only for elements and attributes

     35     parentNode = None
     36     ownerDocument = None
     37     nextSibling = None
     38     previousSibling = None
     39 
     40     prefix = EMPTY_PREFIX # non-null only for NS elements and attributes

     41 
     42     def __nonzero__(self):
     43         return True
     44 
     45     def toxml(self, encoding = None):
     46         return self.toprettyxml("", "", encoding)
     47 
     48     def toprettyxml(self, indent="\t", newl="\n", encoding = None):
     49         # indent = the indentation string to prepend, per level

     50         # newl = the newline string to append

     51         writer = _get_StringIO()
     52         if encoding is not None:
     53             import codecs
     54             # Can't use codecs.getwriter to preserve 2.0 compatibility

     55             writer = codecs.lookup(encoding)[3](writer)
     56         if self.nodeType == Node.DOCUMENT_NODE:
     57             # Can pass encoding only to document, to put it into XML header

     58             self.writexml(writer, "", indent, newl, encoding)
     59         else:
     60             self.writexml(writer, "", indent, newl)
     61         return writer.getvalue()
     62 
     63     def hasChildNodes(self):
     64         if self.childNodes:
     65             return True
     66         else:
     67             return False
     68 
     69     def _get_childNodes(self):
     70         return self.childNodes
     71 
     72     def _get_firstChild(self):
     73         if self.childNodes:
     74             return self.childNodes[0]
     75 
     76     def _get_lastChild(self):
     77         if self.childNodes:
     78             return self.childNodes[-1]
     79 
     80     def insertBefore(self, newChild, refChild):
     81         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
     82             for c in tuple(newChild.childNodes):
     83                 self.insertBefore(c, refChild)
     84             ### The DOM does not clearly specify what to return in this case

     85             return newChild
     86         if newChild.nodeType not in self._child_node_types:
     87             raise xml.dom.HierarchyRequestErr(
     88                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
     89         if newChild.parentNode is not None:
     90             newChild.parentNode.removeChild(newChild)
     91         if refChild is None:
     92             self.appendChild(newChild)
     93         else:
     94             try:
     95                 index = self.childNodes.index(refChild)
     96             except ValueError:
     97                 raise xml.dom.NotFoundErr()
     98             if newChild.nodeType in _nodeTypes_with_children:
     99                 _clear_id_cache(self)
    100             self.childNodes.insert(index, newChild)
    101             newChild.nextSibling = refChild
    102             refChild.previousSibling = newChild
    103             if index:
    104                 node = self.childNodes[index-1]
    105                 node.nextSibling = newChild
    106                 newChild.previousSibling = node
    107             else:
    108                 newChild.previousSibling = None
    109             newChild.parentNode = self
    110         return newChild
    111 
    112     def appendChild(self, node):
    113         if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
    114             for c in tuple(node.childNodes):
    115                 self.appendChild(c)
    116             ### The DOM does not clearly specify what to return in this case

    117             return node
    118         if node.nodeType not in self._child_node_types:
    119             raise xml.dom.HierarchyRequestErr(
    120                 "%s cannot be child of %s" % (repr(node), repr(self)))
    121         elif node.nodeType in _nodeTypes_with_children:
    122             _clear_id_cache(self)
    123         if node.parentNode is not None:
    124             node.parentNode.removeChild(node)
    125         _append_child(self, node)
    126         node.nextSibling = None
    127         return node
    128 
    129     def replaceChild(self, newChild, oldChild):
    130         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
    131             refChild = oldChild.nextSibling
    132             self.removeChild(oldChild)
    133             return self.insertBefore(newChild, refChild)
    134         if newChild.nodeType not in self._child_node_types:
    135             raise xml.dom.HierarchyRequestErr(
    136                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
    137         if newChild is oldChild:
    138             return
    139         if newChild.parentNode is not None:
    140             newChild.parentNode.removeChild(newChild)
    141         try:
    142             index = self.childNodes.index(oldChild)
    143         except ValueError:
    144             raise xml.dom.NotFoundErr()
    145         self.childNodes[index] = newChild
    146         newChild.parentNode = self
    147         oldChild.parentNode = None
    148         if (newChild.nodeType in _nodeTypes_with_children
    149             or oldChild.nodeType in _nodeTypes_with_children):
    150             _clear_id_cache(self)
    151         newChild.nextSibling = oldChild.nextSibling
    152         newChild.previousSibling = oldChild.previousSibling
    153         oldChild.nextSibling = None
    154         oldChild.previousSibling = None
    155         if newChild.previousSibling:
    156             newChild.previousSibling.nextSibling = newChild
    157         if newChild.nextSibling:
    158             newChild.nextSibling.previousSibling = newChild
    159         return oldChild
    160 
    161     def removeChild(self, oldChild):
    162         try:
    163             self.childNodes.remove(oldChild)
    164         except ValueError:
    165             raise xml.dom.NotFoundErr()
    166         if oldChild.nextSibling is not None:
    167             oldChild.nextSibling.previousSibling = oldChild.previousSibling
    168         if oldChild.previousSibling is not None:
    169             oldChild.previousSibling.nextSibling = oldChild.nextSibling
    170         oldChild.nextSibling = oldChild.previousSibling = None
    171         if oldChild.nodeType in _nodeTypes_with_children:
    172             _clear_id_cache(self)
    173 
    174         oldChild.parentNode = None
    175         return oldChild
    176 
    177     def normalize(self):
    178         L = []
    179         for child in self.childNodes:
    180             if child.nodeType == Node.TEXT_NODE:
    181                 if not child.data:
    182                     # empty text node; discard

    183                     if L:
    184                         L[-1].nextSibling = child.nextSibling
    185                     if child.nextSibling:
    186                         child.nextSibling.previousSibling = child.previousSibling
    187                     child.unlink()
    188                 elif L and L[-1].nodeType == child.nodeType:
    189                     # collapse text node

    190                     node = L[-1]
    191                     node.data = node.data + child.data
    192                     node.nextSibling = child.nextSibling
    193                     if child.nextSibling:
    194                         child.nextSibling.previousSibling = node
    195                     child.unlink()
    196                 else:
    197                     L.append(child)
    198             else:
    199                 L.append(child)
    200                 if child.nodeType == Node.ELEMENT_NODE:
    201                     child.normalize()
    202         self.childNodes[:] = L
    203 
    204     def cloneNode(self, deep):
    205         return _clone_node(self, deep, self.ownerDocument or self)
    206 
    207     def isSupported(self, feature, version):
    208         return self.ownerDocument.implementation.hasFeature(feature, version)
    209 
    210     def _get_localName(self):
    211         # Overridden in Element and Attr where localName can be Non-Null

    212         return None
    213 
    214     # Node interfaces from Level 3 (WD 9 April 2002)

    215 
    216     def isSameNode(self, other):
    217         return self is other
    218 
    219     def getInterface(self, feature):
    220         if self.isSupported(feature, None):
    221             return self
    222         else:
    223             return None
    224 
    225     # The "user data" functions use a dictionary that is only present

    226     # if some user data has been set, so be careful not to assume it

    227     # exists.

    228 
    229     def getUserData(self, key):
    230         try:
    231             return self._user_data[key][0]
    232         except (AttributeError, KeyError):
    233             return None
    234 
    235     def setUserData(self, key, data, handler):
    236         old = None
    237         try:
    238             d = self._user_data
    239         except AttributeError:
    240             d = {}
    241             self._user_data = d
    242         if key in d:
    243             old = d[key][0]
    244         if data is None:
    245             # ignore handlers passed for None

    246             handler = None
    247             if old is not None:
    248                 del d[key]
    249         else:
    250             d[key] = (data, handler)
    251         return old
    252 
    253     def _call_user_data_handler(self, operation, src, dst):
    254         if hasattr(self, "_user_data"):
    255             for key, (data, handler) in self._user_data.items():
    256                 if handler is not None:
    257                     handler.handle(operation, key, data, src, dst)
    258 
    259     # minidom-specific API:

    260 
    261     def unlink(self):
    262         self.parentNode = self.ownerDocument = None
    263         if self.childNodes:
    264             for child in self.childNodes:
    265                 child.unlink()
    266             self.childNodes = NodeList()
    267         self.previousSibling = None
    268         self.nextSibling = None
    269 
    270 defproperty(Node, "firstChild", doc="First child node, or None.")
    271 defproperty(Node, "lastChild",  doc="Last child node, or None.")
    272 defproperty(Node, "localName",  doc="Namespace-local name of this node.")
    273 
    274 
    275 def _append_child(self, node):
    276     # fast path with less checks; usable by DOM builders if careful

    277     childNodes = self.childNodes
    278     if childNodes:
    279         last = childNodes[-1]
    280         node.__dict__["previousSibling"] = last
    281         last.__dict__["nextSibling"] = node
    282     childNodes.append(node)
    283     node.__dict__["parentNode"] = self
    284 
    285 def _in_document(node):
    286     # return True iff node is part of a document tree

    287     while node is not None:
    288         if node.nodeType == Node.DOCUMENT_NODE:
    289             return True
    290         node = node.parentNode
    291     return False
    292 
    293 def _write_data(writer, data):
    294     "Writes datachars to writer."
    295     if data:
    296         data = data.replace("&", "&amp;").replace("<", "&lt;"). \
    297                     replace("\"", "&quot;").replace(">", "&gt;")
    298         writer.write(data)
    299 
    300 def _get_elements_by_tagName_helper(parent, name, rc):
    301     for node in parent.childNodes:
    302         if node.nodeType == Node.ELEMENT_NODE and \
    303             (name == "*" or node.tagName == name):
    304             rc.append(node)
    305         _get_elements_by_tagName_helper(node, name, rc)
    306     return rc
    307 
    308 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
    309     for node in parent.childNodes:
    310         if node.nodeType == Node.ELEMENT_NODE:
    311             if ((localName == "*" or node.localName == localName) and
    312                 (nsURI == "*" or node.namespaceURI == nsURI)):
    313                 rc.append(node)
    314             _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
    315     return rc
    316 
    317 class DocumentFragment(Node):
    318     nodeType = Node.DOCUMENT_FRAGMENT_NODE
    319     nodeName = "#document-fragment"
    320     nodeValue = None
    321     attributes = None
    322     parentNode = None
    323     _child_node_types = (Node.ELEMENT_NODE,
    324                          Node.TEXT_NODE,
    325                          Node.CDATA_SECTION_NODE,
    326                          Node.ENTITY_REFERENCE_NODE,
    327                          Node.PROCESSING_INSTRUCTION_NODE,
    328                          Node.COMMENT_NODE,
    329                          Node.NOTATION_NODE)
    330 
    331     def __init__(self):
    332         self.childNodes = NodeList()
    333 
    334 
    335 class Attr(Node):
    336     nodeType = Node.ATTRIBUTE_NODE
    337     attributes = None
    338     ownerElement = None
    339     specified = False
    340     _is_id = False
    341 
    342     _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
    343 
    344     def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
    345                  prefix=None):
    346         # skip setattr for performance

    347         d = self.__dict__
    348         d["nodeName"] = d["name"] = qName
    349         d["namespaceURI"] = namespaceURI
    350         d["prefix"] = prefix
    351         d['childNodes'] = NodeList()
    352 
    353         # Add the single child node that represents the value of the attr

    354         self.childNodes.append(Text())
    355 
    356         # nodeValue and value are set elsewhere

    357 
    358     def _get_localName(self):
    359         return self.nodeName.split(":", 1)[-1]
    360 
    361     def _get_specified(self):
    362         return self.specified
    363 
    364     def __setattr__(self, name, value):
    365         d = self.__dict__
    366         if name in ("value", "nodeValue"):
    367             d["value"] = d["nodeValue"] = value
    368             d2 = self.childNodes[0].__dict__
    369             d2["data"] = d2["nodeValue"] = value
    370             if self.ownerElement is not None:
    371                 _clear_id_cache(self.ownerElement)
    372         elif name in ("name", "nodeName"):
    373             d["name"] = d["nodeName"] = value
    374             if self.ownerElement is not None:
    375                 _clear_id_cache(self.ownerElement)
    376         else:
    377             d[name] = value
    378 
    379     def _set_prefix(self, prefix):
    380         nsuri = self.namespaceURI
    381         if prefix == "xmlns":
    382             if nsuri and nsuri != XMLNS_NAMESPACE:
    383                 raise xml.dom.NamespaceErr(
    384                     "illegal use of 'xmlns' prefix for the wrong namespace")
    385         d = self.__dict__
    386         d['prefix'] = prefix
    387         if prefix is None:
    388             newName = self.localName
    389         else:
    390             newName = "%s:%s" % (prefix, self.localName)
    391         if self.ownerElement:
    392             _clear_id_cache(self.ownerElement)
    393         d['nodeName'] = d['name'] = newName
    394 
    395     def _set_value(self, value):
    396         d = self.__dict__
    397         d['value'] = d['nodeValue'] = value
    398         if self.ownerElement:
    399             _clear_id_cache(self.ownerElement)
    400         self.childNodes[0].data = value
    401 
    402     def unlink(self):
    403         # This implementation does not call the base implementation

    404         # since most of that is not needed, and the expense of the

    405         # method call is not warranted.  We duplicate the removal of

    406         # children, but that's all we needed from the base class.

    407         elem = self.ownerElement
    408         if elem is not None:
    409             del elem._attrs[self.nodeName]
    410             del elem._attrsNS[(self.namespaceURI, self.localName)]
    411             if self._is_id:
    412                 self._is_id = False
    413                 elem._magic_id_nodes -= 1
    414                 self.ownerDocument._magic_id_count -= 1
    415         for child in self.childNodes:
    416             child.unlink()
    417         del self.childNodes[:]
    418 
    419     def _get_isId(self):
    420         if self._is_id:
    421             return True
    422         doc = self.ownerDocument
    423         elem = self.ownerElement
    424         if doc is None or elem is None:
    425             return False
    426 
    427         info = doc._get_elem_info(elem)
    428         if info is None:
    429             return False
    430         if self.namespaceURI:
    431             return info.isIdNS(self.namespaceURI, self.localName)
    432         else:
    433             return info.isId(self.nodeName)
    434 
    435     def _get_schemaType(self):
    436         doc = self.ownerDocument
    437         elem = self.ownerElement
    438         if doc is None or elem is None:
    439             return _no_type
    440 
    441         info = doc._get_elem_info(elem)
    442         if info is None:
    443             return _no_type
    444         if self.namespaceURI:
    445             return info.getAttributeTypeNS(self.namespaceURI, self.localName)
    446         else:
    447             return info.getAttributeType(self.nodeName)
    448 
    449 defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
    450 defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
    451 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
    452 
    453 
    454 class NamedNodeMap(object):
    455     """The attribute list is a transient interface to the underlying
    456     dictionaries.  Mutations here will change the underlying element's
    457     dictionary.
    458 
    459     Ordering is imposed artificially and does not reflect the order of
    460     attributes as found in an input document.
    461     """
    462 
    463     __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
    464 
    465     def __init__(self, attrs, attrsNS, ownerElement):
    466         self._attrs = attrs
    467         self._attrsNS = attrsNS
    468         self._ownerElement = ownerElement
    469 
    470     def _get_length(self):
    471         return len(self._attrs)
    472 
    473     def item(self, index):
    474         try:
    475             return self[self._attrs.keys()[index]]
    476         except IndexError:
    477             return None
    478 
    479     def items(self):
    480         L = []
    481         for node in self._attrs.values():
    482             L.append((node.nodeName, node.value))
    483         return L
    484 
    485     def itemsNS(self):
    486         L = []
    487         for node in self._attrs.values():
    488             L.append(((node.namespaceURI, node.localName), node.value))
    489         return L
    490 
    491     def has_key(self, key):
    492         if isinstance(key, StringTypes):
    493             return key in self._attrs
    494         else:
    495             return key in self._attrsNS
    496 
    497     def keys(self):
    498         return self._attrs.keys()
    499 
    500     def keysNS(self):
    501         return self._attrsNS.keys()
    502 
    503     def values(self):
    504         return self._attrs.values()
    505 
    506     def get(self, name, value=None):
    507         return self._attrs.get(name, value)
    508 
    509     __len__ = _get_length
    510 
    511     __hash__ = None # Mutable type can't be correctly hashed

    512     def __cmp__(self, other):
    513         if self._attrs is getattr(other, "_attrs", None):
    514             return 0
    515         else:
    516             return cmp(id(self), id(other))
    517 
    518     def __getitem__(self, attname_or_tuple):
    519         if isinstance(attname_or_tuple, tuple):
    520             return self._attrsNS[attname_or_tuple]
    521         else:
    522             return self._attrs[attname_or_tuple]
    523 
    524     # same as set

    525     def __setitem__(self, attname, value):
    526         if isinstance(value, StringTypes):
    527             try:
    528                 node = self._attrs[attname]
    529             except KeyError:
    530                 node = Attr(attname)
    531                 node.ownerDocument = self._ownerElement.ownerDocument
    532                 self.setNamedItem(node)
    533             node.value = value
    534         else:
    535             if not isinstance(value, Attr):
    536                 raise TypeError, "value must be a string or Attr object"
    537             node = value
    538             self.setNamedItem(node)
    539 
    540     def getNamedItem(self, name):
    541         try:
    542             return self._attrs[name]
    543         except KeyError:
    544             return None
    545 
    546     def getNamedItemNS(self, namespaceURI, localName):
    547         try:
    548             return self._attrsNS[(namespaceURI, localName)]
    549         except KeyError:
    550             return None
    551 
    552     def removeNamedItem(self, name):
    553         n = self.getNamedItem(name)
    554         if n is not None:
    555             _clear_id_cache(self._ownerElement)
    556             del self._attrs[n.nodeName]
    557             del self._attrsNS[(n.namespaceURI, n.localName)]
    558             if 'ownerElement' in n.__dict__:
    559                 n.__dict__['ownerElement'] = None
    560             return n
    561         else:
    562             raise xml.dom.NotFoundErr()
    563 
    564     def removeNamedItemNS(self, namespaceURI, localName):
    565         n = self.getNamedItemNS(namespaceURI, localName)
    566         if n is not None:
    567             _clear_id_cache(self._ownerElement)
    568             del self._attrsNS[(n.namespaceURI, n.localName)]
    569             del self._attrs[n.nodeName]
    570             if 'ownerElement' in n.__dict__:
    571                 n.__dict__['ownerElement'] = None
    572             return n
    573         else:
    574             raise xml.dom.NotFoundErr()
    575 
    576     def setNamedItem(self, node):
    577         if not isinstance(node, Attr):
    578             raise xml.dom.HierarchyRequestErr(
    579                 "%s cannot be child of %s" % (repr(node), repr(self)))
    580         old = self._attrs.get(node.name)
    581         if old:
    582             old.unlink()
    583         self._attrs[node.name] = node
    584         self._attrsNS[(node.namespaceURI, node.localName)] = node
    585         node.ownerElement = self._ownerElement
    586         _clear_id_cache(node.ownerElement)
    587         return old
    588 
    589     def setNamedItemNS(self, node):
    590         return self.setNamedItem(node)
    591 
    592     def __delitem__(self, attname_or_tuple):
    593         node = self[attname_or_tuple]
    594         _clear_id_cache(node.ownerElement)
    595         node.unlink()
    596 
    597     def __getstate__(self):
    598         return self._attrs, self._attrsNS, self._ownerElement
    599 
    600     def __setstate__(self, state):
    601         self._attrs, self._attrsNS, self._ownerElement = state
    602 
    603 defproperty(NamedNodeMap, "length",
    604             doc="Number of nodes in the NamedNodeMap.")
    605 
    606 AttributeList = NamedNodeMap
    607 
    608 
    609 class TypeInfo(object):
    610     __slots__ = 'namespace', 'name'
    611 
    612     def __init__(self, namespace, name):
    613         self.namespace = namespace
    614         self.name = name
    615 
    616     def __repr__(self):
    617         if self.namespace:
    618             return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
    619         else:
    620             return "<TypeInfo %r>" % self.name
    621 
    622     def _get_name(self):
    623         return self.name
    624 
    625     def _get_namespace(self):
    626         return self.namespace
    627 
    628 _no_type = TypeInfo(None, None)
    629 
    630 class Element(Node):
    631     nodeType = Node.ELEMENT_NODE
    632     nodeValue = None
    633     schemaType = _no_type
    634 
    635     _magic_id_nodes = 0
    636 
    637     _child_node_types = (Node.ELEMENT_NODE,
    638                          Node.PROCESSING_INSTRUCTION_NODE,
    639                          Node.COMMENT_NODE,
    640                          Node.TEXT_NODE,
    641                          Node.CDATA_SECTION_NODE,
    642                          Node.ENTITY_REFERENCE_NODE)
    643 
    644     def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
    645                  localName=None):
    646         self.tagName = self.nodeName = tagName
    647         self.prefix = prefix
    648         self.namespaceURI = namespaceURI
    649         self.childNodes = NodeList()
    650 
    651         self._attrs = {}   # attributes are double-indexed:

    652         self._attrsNS = {} #    tagName -> Attribute

    653                            #    URI,localName -> Attribute

    654                            # in the future: consider lazy generation

    655                            # of attribute objects this is too tricky

    656                            # for now because of headaches with

    657                            # namespaces.

    658 
    659     def _get_localName(self):
    660         return self.tagName.split(":", 1)[-1]
    661 
    662     def _get_tagName(self):
    663         return self.tagName
    664 
    665     def unlink(self):
    666         for attr in self._attrs.values():
    667             attr.unlink()
    668         self._attrs = None
    669         self._attrsNS = None
    670         Node.unlink(self)
    671 
    672     def getAttribute(self, attname):
    673         try:
    674             return self._attrs[attname].value
    675         except KeyError:
    676             return ""
    677 
    678     def getAttributeNS(self, namespaceURI, localName):
    679         try:
    680             return self._attrsNS[(namespaceURI, localName)].value
    681         except KeyError:
    682             return ""
    683 
    684     def setAttribute(self, attname, value):
    685         attr = self.getAttributeNode(attname)
    686         if attr is None:
    687             attr = Attr(attname)
    688             # for performance

    689             d = attr.__dict__
    690             d["value"] = d["nodeValue"] = value
    691             d["ownerDocument"] = self.ownerDocument
    692             self.setAttributeNode(attr)
    693         elif value != attr.value:
    694             d = attr.__dict__
    695             d["value"] = d["nodeValue"] = value
    696             if attr.isId:
    697                 _clear_id_cache(self)
    698 
    699     def setAttributeNS(self, namespaceURI, qualifiedName, value):
    700         prefix, localname = _nssplit(qualifiedName)
    701         attr = self.getAttributeNodeNS(namespaceURI, localname)
    702         if attr is None:
    703             # for performance

    704             attr = Attr(qualifiedName, namespaceURI, localname, prefix)
    705             d = attr.__dict__
    706             d["prefix"] = prefix
    707             d["nodeName"] = qualifiedName
    708             d["value"] = d["nodeValue"] = value
    709             d["ownerDocument"] = self.ownerDocument
    710             self.setAttributeNode(attr)
    711         else:
    712             d = attr.__dict__
    713             if value != attr.value:
    714                 d["value"] = d["nodeValue"] = value
    715                 if attr.isId:
    716                     _clear_id_cache(self)
    717             if attr.prefix != prefix:
    718                 d["prefix"] = prefix
    719                 d["nodeName"] = qualifiedName
    720 
    721     def getAttributeNode(self, attrname):
    722         return self._attrs.get(attrname)
    723 
    724     def getAttributeNodeNS(self, namespaceURI, localName):
    725         return self._attrsNS.get((namespaceURI, localName))
    726 
    727     def setAttributeNode(self, attr):
    728         if attr.ownerElement not in (None, self):
    729             raise xml.dom.InuseAttributeErr("attribute node already owned")
    730         old1 = self._attrs.get(attr.name, None)
    731         if old1 is not None:
    732             self.removeAttributeNode(old1)
    733         old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
    734         if old2 is not None and old2 is not old1:
    735             self.removeAttributeNode(old2)
    736         _set_attribute_node(self, attr)
    737 
    738         if old1 is not attr:
    739             # It might have already been part of this node, in which case

    740             # it doesn't represent a change, and should not be returned.

    741             return old1
    742         if old2 is not attr:
    743             return old2
    744 
    745     setAttributeNodeNS = setAttributeNode
    746 
    747     def removeAttribute(self, name):
    748         try:
    749             attr = self._attrs[name]
    750         except KeyError:
    751             raise xml.dom.NotFoundErr()
    752         self.removeAttributeNode(attr)
    753 
    754     def removeAttributeNS(self, namespaceURI, localName):
    755         try:
    756             attr = self._attrsNS[(namespaceURI, localName)]
    757         except KeyError:
    758             raise xml.dom.NotFoundErr()
    759         self.removeAttributeNode(attr)
    760 
    761     def removeAttributeNode(self, node):
    762         if node is None:
    763             raise xml.dom.NotFoundErr()
    764         try:
    765             self._attrs[node.name]
    766         except KeyError:
    767             raise xml.dom.NotFoundErr()
    768         _clear_id_cache(self)
    769         node.unlink()
    770         # Restore this since the node is still useful and otherwise

    771         # unlinked

    772         node.ownerDocument = self.ownerDocument
    773 
    774     removeAttributeNodeNS = removeAttributeNode
    775 
    776     def hasAttribute(self, name):
    777         return name in self._attrs
    778 
    779     def hasAttributeNS(self, namespaceURI, localName):
    780         return (namespaceURI, localName) in self._attrsNS
    781 
    782     def getElementsByTagName(self, name):
    783         return _get_elements_by_tagName_helper(self, name, NodeList())
    784 
    785     def getElementsByTagNameNS(self, namespaceURI, localName):
    786         return _get_elements_by_tagName_ns_helper(
    787             self, namespaceURI, localName, NodeList())
    788 
    789     def __repr__(self):
    790         return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
    791 
    792     def writexml(self, writer, indent="", addindent="", newl=""):
    793         # indent = current indentation

    794         # addindent = indentation to add to higher levels

    795         # newl = newline string

    796         writer.write(indent+"<" + self.tagName)
    797 
    798         attrs = self._get_attributes()
    799         a_names = attrs.keys()
    800         a_names.sort()
    801 
    802         for a_name in a_names:
    803             writer.write(" %s=\"" % a_name)
    804             _write_data(writer, attrs[a_name].value)
    805             writer.write("\"")
    806         if self.childNodes:
    807             writer.write(">")
    808             if (len(self.childNodes) == 1 and
    809                 self.childNodes[0].nodeType == Node.TEXT_NODE):
    810                 self.childNodes[0].writexml(writer, '', '', '')
    811             else:
    812                 writer.write(newl)
    813                 for node in self.childNodes:
    814                     node.writexml(writer, indent+addindent, addindent, newl)
    815                 writer.write(indent)
    816             writer.write("</%s>%s" % (self.tagName, newl))
    817         else:
    818             writer.write("/>%s"%(newl))
    819 
    820     def _get_attributes(self):
    821         return NamedNodeMap(self._attrs, self._attrsNS, self)
    822 
    823     def hasAttributes(self):
    824         if self._attrs:
    825             return True
    826         else:
    827             return False
    828 
    829     # DOM Level 3 attributes, based on the 22 Oct 2002 draft

    830 
    831     def setIdAttribute(self, name):
    832         idAttr = self.getAttributeNode(name)
    833         self.setIdAttributeNode(idAttr)
    834 
    835     def setIdAttributeNS(self, namespaceURI, localName):
    836         idAttr = self.getAttributeNodeNS(namespaceURI, localName)
    837         self.setIdAttributeNode(idAttr)
    838 
    839     def setIdAttributeNode(self, idAttr):
    840         if idAttr is None or not self.isSameNode(idAttr.ownerElement):
    841             raise xml.dom.NotFoundErr()
    842         if _get_containing_entref(self) is not None:
    843             raise xml.dom.NoModificationAllowedErr()
    844         if not idAttr._is_id:
    845             idAttr.__dict__['_is_id'] = True
    846             self._magic_id_nodes += 1
    847             self.ownerDocument._magic_id_count += 1
    848             _clear_id_cache(self)
    849 
    850 defproperty(Element, "attributes",
    851             doc="NamedNodeMap of attributes on the element.")
    852 defproperty(Element, "localName",
    853             doc="Namespace-local name of this element.")
    854 
    855 
    856 def _set_attribute_node(element, attr):
    857     _clear_id_cache(element)
    858     element._attrs[attr.name] = attr
    859     element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
    860 
    861     # This creates a circular reference, but Element.unlink()

    862     # breaks the cycle since the references to the attribute

    863     # dictionaries are tossed.

    864     attr.__dict__['ownerElement'] = element
    865 
    866 
    867 class Childless:
    868     """Mixin that makes childless-ness easy to implement and avoids
    869     the complexity of the Node methods that deal with children.
    870     """
    871 
    872     attributes = None
    873     childNodes = EmptyNodeList()
    874     firstChild = None
    875     lastChild = None
    876 
    877     def _get_firstChild(self):
    878         return None
    879 
    880     def _get_lastChild(self):
    881         return None
    882 
    883     def appendChild(self, node):
    884         raise xml.dom.HierarchyRequestErr(
    885             self.nodeName + " nodes cannot have children")
    886 
    887     def hasChildNodes(self):
    888         return False
    889 
    890     def insertBefore(self, newChild, refChild):
    891         raise xml.dom.HierarchyRequestErr(
    892             self.nodeName + " nodes do not have children")
    893 
    894     def removeChild(self, oldChild):
    895         raise xml.dom.NotFoundErr(
    896             self.nodeName + " nodes do not have children")
    897 
    898     def normalize(self):
    899         # For childless nodes, normalize() has nothing to do.

    900         pass
    901 
    902     def replaceChild(self, newChild, oldChild):
    903         raise xml.dom.HierarchyRequestErr(
    904             self.nodeName + " nodes do not have children")
    905 
    906 
    907 class ProcessingInstruction(Childless, Node):
    908     nodeType = Node.PROCESSING_INSTRUCTION_NODE
    909 
    910     def __init__(self, target, data):
    911         self.target = self.nodeName = target
    912         self.data = self.nodeValue = data
    913 
    914     def _get_data(self):
    915         return self.data
    916     def _set_data(self, value):
    917         d = self.__dict__
    918         d['data'] = d['nodeValue'] = value
    919 
    920     def _get_target(self):
    921         return self.target
    922     def _set_target(self, value):
    923         d = self.__dict__
    924         d['target'] = d['nodeName'] = value
    925 
    926     def __setattr__(self, name, value):
    927         if name == "data" or name == "nodeValue":
    928             self.__dict__['data'] = self.__dict__['nodeValue'] = value
    929         elif name == "target" or name == "nodeName":
    930             self.__dict__['target'] = self.__dict__['nodeName'] = value
    931         else:
    932             self.__dict__[name] = value
    933 
    934     def writexml(self, writer, indent="", addindent="", newl=""):
    935         writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
    936 
    937 
    938 class CharacterData(Childless, Node):
    939     def _get_length(self):
    940         return len(self.data)
    941     __len__ = _get_length
    942 
    943     def _get_data(self):
    944         return self.__dict__['data']
    945     def _set_data(self, data):
    946         d = self.__dict__
    947         d['data'] = d['nodeValue'] = data
    948 
    949     _get_nodeValue = _get_data
    950     _set_nodeValue = _set_data
    951 
    952     def __setattr__(self, name, value):
    953         if name == "data" or name == "nodeValue":
    954             self.__dict__['data'] = self.__dict__['nodeValue'] = value
    955         else:
    956             self.__dict__[name] = value
    957 
    958     def __repr__(self):
    959         data = self.data
    960         if len(data) > 10:
    961             dotdotdot = "..."
    962         else:
    963             dotdotdot = ""
    964         return '<DOM %s node "%r%s">' % (
    965             self.__class__.__name__, data[0:10], dotdotdot)
    966 
    967     def substringData(self, offset, count):
    968         if offset < 0:
    969             raise xml.dom.IndexSizeErr("offset cannot be negative")
    970         if offset >= len(self.data):
    971             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    972         if count < 0:
    973             raise xml.dom.IndexSizeErr("count cannot be negative")
    974         return self.data[offset:offset+count]
    975 
    976     def appendData(self, arg):
    977         self.data = self.data + arg
    978 
    979     def insertData(self, offset, arg):
    980         if offset < 0:
    981             raise xml.dom.IndexSizeErr("offset cannot be negative")
    982         if offset >= len(self.data):
    983             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    984         if arg:
    985             self.data = "%s%s%s" % (
    986                 self.data[:offset], arg, self.data[offset:])
    987 
    988     def deleteData(self, offset, count):
    989         if offset < 0:
    990             raise xml.dom.IndexSizeErr("offset cannot be negative")
    991         if offset >= len(self.data):
    992             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    993         if count < 0:
    994             raise xml.dom.IndexSizeErr("count cannot be negative")
    995         if count:
    996             self.data = self.data[:offset] + self.data[offset+count:]
    997 
    998     def replaceData(self, offset, count, arg):
    999         if offset < 0:
   1000             raise xml.dom.IndexSizeErr("offset cannot be negative")
   1001         if offset >= len(self.data):
   1002             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
   1003         if count < 0:
   1004             raise xml.dom.IndexSizeErr("count cannot be negative")
   1005         if count:
   1006             self.data = "%s%s%s" % (
   1007                 self.data[:offset], arg, self.data[offset+count:])
   1008 
   1009 defproperty(CharacterData, "length", doc="Length of the string data.")
   1010 
   1011 
   1012 class Text(CharacterData):
   1013     # Make sure we don't add an instance __dict__ if we don't already

   1014     # have one, at least when that's possible:

   1015     # XXX this does not work, CharacterData is an old-style class

   1016     # __slots__ = ()

   1017 
   1018     nodeType = Node.TEXT_NODE
   1019     nodeName = "#text"
   1020     attributes = None
   1021 
   1022     def splitText(self, offset):
   1023         if offset < 0 or offset > len(self.data):
   1024             raise xml.dom.IndexSizeErr("illegal offset value")
   1025         newText = self.__class__()
   1026         newText.data = self.data[offset:]
   1027         newText.ownerDocument = self.ownerDocument
   1028         next = self.nextSibling
   1029         if self.parentNode and self in self.parentNode.childNodes:
   1030             if next is None:
   1031                 self.parentNode.appendChild(newText)
   1032             else:
   1033                 self.parentNode.insertBefore(newText, next)
   1034         self.data = self.data[:offset]
   1035         return newText
   1036 
   1037     def writexml(self, writer, indent="", addindent="", newl=""):
   1038         _write_data(writer, "%s%s%s" % (indent, self.data, newl))
   1039 
   1040     # DOM Level 3 (WD 9 April 2002)

   1041 
   1042     def _get_wholeText(self):
   1043         L = [self.data]
   1044         n = self.previousSibling
   1045         while n is not None:
   1046             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1047                 L.insert(0, n.data)
   1048                 n = n.previousSibling
   1049             else:
   1050                 break
   1051         n = self.nextSibling
   1052         while n is not None:
   1053             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1054                 L.append(n.data)
   1055                 n = n.nextSibling
   1056             else:
   1057                 break
   1058         return ''.join(L)
   1059 
   1060     def replaceWholeText(self, content):
   1061         # XXX This needs to be seriously changed if minidom ever

   1062         # supports EntityReference nodes.

   1063         parent = self.parentNode
   1064         n = self.previousSibling
   1065         while n is not None:
   1066             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1067                 next = n.previousSibling
   1068                 parent.removeChild(n)
   1069                 n = next
   1070             else:
   1071                 break
   1072         n = self.nextSibling
   1073         if not content:
   1074             parent.removeChild(self)
   1075         while n is not None:
   1076             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1077                 next = n.nextSibling
   1078                 parent.removeChild(n)
   1079                 n = next
   1080             else:
   1081                 break
   1082         if content:
   1083             d = self.__dict__
   1084             d['data'] = content
   1085             d['nodeValue'] = content
   1086             return self
   1087         else:
   1088             return None
   1089 
   1090     def _get_isWhitespaceInElementContent(self):
   1091         if self.data.strip():
   1092             return False
   1093         elem = _get_containing_element(self)
   1094         if elem is None:
   1095             return False
   1096         info = self.ownerDocument._get_elem_info(elem)
   1097         if info is None:
   1098             return False
   1099         else:
   1100             return info.isElementContent()
   1101 
   1102 defproperty(Text, "isWhitespaceInElementContent",
   1103             doc="True iff this text node contains only whitespace"
   1104                 " and is in element content.")
   1105 defproperty(Text, "wholeText",
   1106             doc="The text of all logically-adjacent text nodes.")
   1107 
   1108 
   1109 def _get_containing_element(node):
   1110     c = node.parentNode
   1111     while c is not None:
   1112         if c.nodeType == Node.ELEMENT_NODE:
   1113             return c
   1114         c = c.parentNode
   1115     return None
   1116 
   1117 def _get_containing_entref(node):
   1118     c = node.parentNode
   1119     while c is not None:
   1120         if c.nodeType == Node.ENTITY_REFERENCE_NODE:
   1121             return c
   1122         c = c.parentNode
   1123     return None
   1124 
   1125 
   1126 class Comment(Childless, CharacterData):
   1127     nodeType = Node.COMMENT_NODE
   1128     nodeName = "#comment"
   1129 
   1130     def __init__(self, data):
   1131         self.data = self.nodeValue = data
   1132 
   1133     def writexml(self, writer, indent="", addindent="", newl=""):
   1134         if "--" in self.data:
   1135             raise ValueError("'--' is not allowed in a comment node")
   1136         writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
   1137 
   1138 
   1139 class CDATASection(Text):
   1140     # Make sure we don't add an instance __dict__ if we don't already

   1141     # have one, at least when that's possible:

   1142     # XXX this does not work, Text is an old-style class

   1143     # __slots__ = ()

   1144 
   1145     nodeType = Node.CDATA_SECTION_NODE
   1146     nodeName = "#cdata-section"
   1147 
   1148     def writexml(self, writer, indent="", addindent="", newl=""):
   1149         if self.data.find("]]>") >= 0:
   1150             raise ValueError("']]>' not allowed in a CDATA section")
   1151         writer.write("<![CDATA[%s]]>" % self.data)
   1152 
   1153 
   1154 class ReadOnlySequentialNamedNodeMap(object):
   1155     __slots__ = '_seq',
   1156 
   1157     def __init__(self, seq=()):
   1158         # seq should be a list or tuple

   1159         self._seq = seq
   1160 
   1161     def __len__(self):
   1162         return len(self._seq)
   1163 
   1164     def _get_length(self):
   1165         return len(self._seq)
   1166 
   1167     def getNamedItem(self, name):
   1168         for n in self._seq:
   1169             if n.nodeName == name:
   1170                 return n
   1171 
   1172     def getNamedItemNS(self, namespaceURI, localName):
   1173         for n in self._seq:
   1174             if n.namespaceURI == namespaceURI and n.localName == localName:
   1175                 return n
   1176 
   1177     def __getitem__(self, name_or_tuple):
   1178         if isinstance(name_or_tuple, tuple):
   1179             node = self.getNamedItemNS(*name_or_tuple)
   1180         else:
   1181             node = self.getNamedItem(name_or_tuple)
   1182         if node is None:
   1183             raise KeyError, name_or_tuple
   1184         return node
   1185 
   1186     def item(self, index):
   1187         if index < 0:
   1188             return None
   1189         try:
   1190             return self._seq[index]
   1191         except IndexError:
   1192             return None
   1193 
   1194     def removeNamedItem(self, name):
   1195         raise xml.dom.NoModificationAllowedErr(
   1196             "NamedNodeMap instance is read-only")
   1197 
   1198     def removeNamedItemNS(self, namespaceURI, localName):
   1199         raise xml.dom.NoModificationAllowedErr(
   1200             "NamedNodeMap instance is read-only")
   1201 
   1202     def setNamedItem(self, node):
   1203         raise xml.dom.NoModificationAllowedErr(
   1204             "NamedNodeMap instance is read-only")
   1205 
   1206     def setNamedItemNS(self, node):
   1207         raise xml.dom.NoModificationAllowedErr(
   1208             "NamedNodeMap instance is read-only")
   1209 
   1210     def __getstate__(self):
   1211         return [self._seq]
   1212 
   1213     def __setstate__(self, state):
   1214         self._seq = state[0]
   1215 
   1216 defproperty(ReadOnlySequentialNamedNodeMap, "length",
   1217             doc="Number of entries in the NamedNodeMap.")
   1218 
   1219 
   1220 class Identified:
   1221     """Mix-in class that supports the publicId and systemId attributes."""
   1222 
   1223     # XXX this does not work, this is an old-style class

   1224     # __slots__ = 'publicId', 'systemId'

   1225 
   1226     def _identified_mixin_init(self, publicId, systemId):
   1227         self.publicId = publicId
   1228         self.systemId = systemId
   1229 
   1230     def _get_publicId(self):
   1231         return self.publicId
   1232 
   1233     def _get_systemId(self):
   1234         return self.systemId
   1235 
   1236 class DocumentType(Identified, Childless, Node):
   1237     nodeType = Node.DOCUMENT_TYPE_NODE
   1238     nodeValue = None
   1239     name = None
   1240     publicId = None
   1241     systemId = None
   1242     internalSubset = None
   1243 
   1244     def __init__(self, qualifiedName):
   1245         self.entities = ReadOnlySequentialNamedNodeMap()
   1246         self.notations = ReadOnlySequentialNamedNodeMap()
   1247         if qualifiedName:
   1248             prefix, localname = _nssplit(qualifiedName)
   1249             self.name = localname
   1250         self.nodeName = self.name
   1251 
   1252     def _get_internalSubset(self):
   1253         return self.internalSubset
   1254 
   1255     def cloneNode(self, deep):
   1256         if self.ownerDocument is None:
   1257             # it's ok

   1258             clone = DocumentType(None)
   1259             clone.name = self.name
   1260             clone.nodeName = self.name
   1261             operation = xml.dom.UserDataHandler.NODE_CLONED
   1262             if deep:
   1263                 clone.entities._seq = []
   1264                 clone.notations._seq = []
   1265                 for n in self.notations._seq:
   1266                     notation = Notation(n.nodeName, n.publicId, n.systemId)
   1267                     clone.notations._seq.append(notation)
   1268                     n._call_user_data_handler(operation, n, notation)
   1269                 for e in self.entities._seq:
   1270                     entity = Entity(e.nodeName, e.publicId, e.systemId,
   1271                                     e.notationName)
   1272                     entity.actualEncoding = e.actualEncoding
   1273                     entity.encoding = e.encoding
   1274                     entity.version = e.version
   1275                     clone.entities._seq.append(entity)
   1276                     e._call_user_data_handler(operation, n, entity)
   1277             self._call_user_data_handler(operation, self, clone)
   1278             return clone
   1279         else:
   1280             return None
   1281 
   1282     def writexml(self, writer, indent="", addindent="", newl=""):
   1283         writer.write("<!DOCTYPE ")
   1284         writer.write(self.name)
   1285         if self.publicId:
   1286             writer.write("%s  PUBLIC '%s'%s  '%s'"
   1287                          % (newl, self.publicId, newl, self.systemId))
   1288         elif self.systemId:
   1289             writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
   1290         if self.internalSubset is not None:
   1291             writer.write(" [")
   1292             writer.write(self.internalSubset)
   1293             writer.write("]")
   1294         writer.write(">"+newl)
   1295 
   1296 class Entity(Identified, Node):
   1297     attributes = None
   1298     nodeType = Node.ENTITY_NODE
   1299     nodeValue = None
   1300 
   1301     actualEncoding = None
   1302     encoding = None
   1303     version = None
   1304 
   1305     def __init__(self, name, publicId, systemId, notation):
   1306         self.nodeName = name
   1307         self.notationName = notation
   1308         self.childNodes = NodeList()
   1309         self._identified_mixin_init(publicId, systemId)
   1310 
   1311     def _get_actualEncoding(self):
   1312         return self.actualEncoding
   1313 
   1314     def _get_encoding(self):
   1315         return self.encoding
   1316 
   1317     def _get_version(self):
   1318         return self.version
   1319 
   1320     def appendChild(self, newChild):
   1321         raise xml.dom.HierarchyRequestErr(
   1322             "cannot append children to an entity node")
   1323 
   1324     def insertBefore(self, newChild, refChild):
   1325         raise xml.dom.HierarchyRequestErr(
   1326             "cannot insert children below an entity node")
   1327 
   1328     def removeChild(self, oldChild):
   1329         raise xml.dom.HierarchyRequestErr(
   1330             "cannot remove children from an entity node")
   1331 
   1332     def replaceChild(self, newChild, oldChild):
   1333         raise xml.dom.HierarchyRequestErr(
   1334             "cannot replace children of an entity node")
   1335 
   1336 class Notation(Identified, Childless, Node):
   1337     nodeType = Node.NOTATION_NODE
   1338     nodeValue = None
   1339 
   1340     def __init__(self, name, publicId, systemId):
   1341         self.nodeName = name
   1342         self._identified_mixin_init(publicId, systemId)
   1343 
   1344 
   1345 class DOMImplementation(DOMImplementationLS):
   1346     _features = [("core", "1.0"),
   1347                  ("core", "2.0"),
   1348                  ("core", None),
   1349                  ("xml", "1.0"),
   1350                  ("xml", "2.0"),
   1351                  ("xml", None),
   1352                  ("ls-load", "3.0"),
   1353                  ("ls-load", None),
   1354                  ]
   1355 
   1356     def hasFeature(self, feature, version):
   1357         if version == "":
   1358             version = None
   1359         return (feature.lower(), version) in self._features
   1360 
   1361     def createDocument(self, namespaceURI, qualifiedName, doctype):
   1362         if doctype and doctype.parentNode is not None:
   1363             raise xml.dom.WrongDocumentErr(
   1364                 "doctype object owned by another DOM tree")
   1365         doc = self._create_document()
   1366 
   1367         add_root_element = not (namespaceURI is None
   1368                                 and qualifiedName is None
   1369                                 and doctype is None)
   1370 
   1371         if not qualifiedName and add_root_element:
   1372             # The spec is unclear what to raise here; SyntaxErr

   1373             # would be the other obvious candidate. Since Xerces raises

   1374             # InvalidCharacterErr, and since SyntaxErr is not listed

   1375             # for createDocument, that seems to be the better choice.

   1376             # XXX: need to check for illegal characters here and in

   1377             # createElement.

   1378 
   1379             # DOM Level III clears this up when talking about the return value

   1380             # of this function.  If namespaceURI, qName and DocType are

   1381             # Null the document is returned without a document element

   1382             # Otherwise if doctype or namespaceURI are not None

   1383             # Then we go back to the above problem

   1384             raise xml.dom.InvalidCharacterErr("Element with no name")
   1385 
   1386         if add_root_element:
   1387             prefix, localname = _nssplit(qualifiedName)
   1388             if prefix == "xml" \
   1389                and namespaceURI != "http://www.w3.org/XML/1998/namespace":
   1390                 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
   1391             if prefix and not namespaceURI:
   1392                 raise xml.dom.NamespaceErr(
   1393                     "illegal use of prefix without namespaces")
   1394             element = doc.createElementNS(namespaceURI, qualifiedName)
   1395             if doctype:
   1396                 doc.appendChild(doctype)
   1397             doc.appendChild(element)
   1398 
   1399         if doctype:
   1400             doctype.parentNode = doctype.ownerDocument = doc
   1401 
   1402         doc.doctype = doctype
   1403         doc.implementation = self
   1404         return doc
   1405 
   1406     def createDocumentType(self, qualifiedName, publicId, systemId):
   1407         doctype = DocumentType(qualifiedName)
   1408         doctype.publicId = publicId
   1409         doctype.systemId = systemId
   1410         return doctype
   1411 
   1412     # DOM Level 3 (WD 9 April 2002)

   1413 
   1414     def getInterface(self, feature):
   1415         if self.hasFeature(feature, None):
   1416             return self
   1417         else:
   1418             return None
   1419 
   1420     # internal

   1421     def _create_document(self):
   1422         return Document()
   1423 
   1424 class ElementInfo(object):
   1425     """Object that represents content-model information for an element.
   1426 
   1427     This implementation is not expected to be used in practice; DOM
   1428     builders should provide implementations which do the right thing
   1429     using information available to it.
   1430 
   1431     """
   1432 
   1433     __slots__ = 'tagName',
   1434 
   1435     def __init__(self, name):
   1436         self.tagName = name
   1437 
   1438     def getAttributeType(self, aname):
   1439         return _no_type
   1440 
   1441     def getAttributeTypeNS(self, namespaceURI, localName):
   1442         return _no_type
   1443 
   1444     def isElementContent(self):
   1445         return False
   1446 
   1447     def isEmpty(self):
   1448         """Returns true iff this element is declared to have an EMPTY
   1449         content model."""
   1450         return False
   1451 
   1452     def isId(self, aname):
   1453         """Returns true iff the named attribute is a DTD-style ID."""
   1454         return False
   1455 
   1456     def isIdNS(self, namespaceURI, localName):
   1457         """Returns true iff the identified attribute is a DTD-style ID."""
   1458         return False
   1459 
   1460     def __getstate__(self):
   1461         return self.tagName
   1462 
   1463     def __setstate__(self, state):
   1464         self.tagName = state
   1465 
   1466 def _clear_id_cache(node):
   1467     if node.nodeType == Node.DOCUMENT_NODE:
   1468         node._id_cache.clear()
   1469         node._id_search_stack = None
   1470     elif _in_document(node):
   1471         node.ownerDocument._id_cache.clear()
   1472         node.ownerDocument._id_search_stack= None
   1473 
   1474 class Document(Node, DocumentLS):
   1475     _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
   1476                          Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
   1477 
   1478     nodeType = Node.DOCUMENT_NODE
   1479     nodeName = "#document"
   1480     nodeValue = None
   1481     attributes = None
   1482     doctype = None
   1483     parentNode = None
   1484     previousSibling = nextSibling = None
   1485 
   1486     implementation = DOMImplementation()
   1487 
   1488     # Document attributes from Level 3 (WD 9 April 2002)

   1489 
   1490     actualEncoding = None
   1491     encoding = None
   1492     standalone = None
   1493     version = None
   1494     strictErrorChecking = False
   1495     errorHandler = None
   1496     documentURI = None
   1497 
   1498     _magic_id_count = 0
   1499 
   1500     def __init__(self):
   1501         self.childNodes = NodeList()
   1502         # mapping of (namespaceURI, localName) -> ElementInfo

   1503         #        and tagName -> ElementInfo

   1504         self._elem_info = {}
   1505         self._id_cache = {}
   1506         self._id_search_stack = None
   1507 
   1508     def _get_elem_info(self, element):
   1509         if element.namespaceURI:
   1510             key = element.namespaceURI, element.localName
   1511         else:
   1512             key = element.tagName
   1513         return self._elem_info.get(key)
   1514 
   1515     def _get_actualEncoding(self):
   1516         return self.actualEncoding
   1517 
   1518     def _get_doctype(self):
   1519         return self.doctype
   1520 
   1521     def _get_documentURI(self):
   1522         return self.documentURI
   1523 
   1524     def _get_encoding(self):
   1525         return self.encoding
   1526 
   1527     def _get_errorHandler(self):
   1528         return self.errorHandler
   1529 
   1530     def _get_standalone(self):
   1531         return self.standalone
   1532 
   1533     def _get_strictErrorChecking(self):
   1534         return self.strictErrorChecking
   1535 
   1536     def _get_version(self):
   1537         return self.version
   1538 
   1539     def appendChild(self, node):
   1540         if node.nodeType not in self._child_node_types:
   1541             raise xml.dom.HierarchyRequestErr(
   1542                 "%s cannot be child of %s" % (repr(node), repr(self)))
   1543         if node.parentNode is not None:
   1544             # This needs to be done before the next test since this

   1545             # may *be* the document element, in which case it should

   1546             # end up re-ordered to the end.

   1547             node.parentNode.removeChild(node)
   1548 
   1549         if node.nodeType == Node.ELEMENT_NODE \
   1550            and self._get_documentElement():
   1551             raise xml.dom.HierarchyRequestErr(
   1552                 "two document elements disallowed")
   1553         return Node.appendChild(self, node)
   1554 
   1555     def removeChild(self, oldChild):
   1556         try:
   1557             self.childNodes.remove(oldChild)
   1558         except ValueError:
   1559             raise xml.dom.NotFoundErr()
   1560         oldChild.nextSibling = oldChild.previousSibling = None
   1561         oldChild.parentNode = None
   1562         if self.documentElement is oldChild:
   1563             self.documentElement = None
   1564 
   1565         return oldChild
   1566 
   1567     def _get_documentElement(self):
   1568         for node in self.childNodes:
   1569             if node.nodeType == Node.ELEMENT_NODE:
   1570                 return node
   1571 
   1572     def unlink(self):
   1573         if self.doctype is not None:
   1574             self.doctype.unlink()
   1575             self.doctype = None
   1576         Node.unlink(self)
   1577 
   1578     def cloneNode(self, deep):
   1579         if not deep:
   1580             return None
   1581         clone = self.implementation.createDocument(None, None, None)
   1582         clone.encoding = self.encoding
   1583         clone.standalone = self.standalone
   1584         clone.version = self.version
   1585         for n in self.childNodes:
   1586             childclone = _clone_node(n, deep, clone)
   1587             assert childclone.ownerDocument.isSameNode(clone)
   1588             clone.childNodes.append(childclone)
   1589             if childclone.nodeType == Node.DOCUMENT_NODE:
   1590                 assert clone.documentElement is None
   1591             elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
   1592                 assert clone.doctype is None
   1593                 clone.doctype = childclone
   1594             childclone.parentNode = clone
   1595         self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
   1596                                      self, clone)
   1597         return clone
   1598 
   1599     def createDocumentFragment(self):
   1600         d = DocumentFragment()
   1601         d.ownerDocument = self
   1602         return d
   1603 
   1604     def createElement(self, tagName):
   1605         e = Element(tagName)
   1606         e.ownerDocument = self
   1607         return e
   1608 
   1609     def createTextNode(self, data):
   1610         if not isinstance(data, StringTypes):
   1611             raise TypeError, "node contents must be a string"
   1612         t = Text()
   1613         t.data = data
   1614         t.ownerDocument = self
   1615         return t
   1616 
   1617     def createCDATASection(self, data):
   1618         if not isinstance(data, StringTypes):
   1619             raise TypeError, "node contents must be a string"
   1620         c = CDATASection()
   1621         c.data = data
   1622         c.ownerDocument = self
   1623         return c
   1624 
   1625     def createComment(self, data):
   1626         c = Comment(data)
   1627         c.ownerDocument = self
   1628         return c
   1629 
   1630     def createProcessingInstruction(self, target, data):
   1631         p = ProcessingInstruction(target, data)
   1632         p.ownerDocument = self
   1633         return p
   1634 
   1635     def createAttribute(self, qName):
   1636         a = Attr(qName)
   1637         a.ownerDocument = self
   1638         a.value = ""
   1639         return a
   1640 
   1641     def createElementNS(self, namespaceURI, qualifiedName):
   1642         prefix, localName = _nssplit(qualifiedName)
   1643         e = Element(qualifiedName, namespaceURI, prefix)
   1644         e.ownerDocument = self
   1645         return e
   1646 
   1647     def createAttributeNS(self, namespaceURI, qualifiedName):
   1648         prefix, localName = _nssplit(qualifiedName)
   1649         a = Attr(qualifiedName, namespaceURI, localName, prefix)
   1650         a.ownerDocument = self
   1651         a.value = ""
   1652         return a
   1653 
   1654     # A couple of implementation-specific helpers to create node types

   1655     # not supported by the W3C DOM specs:

   1656 
   1657     def _create_entity(self, name, publicId, systemId, notationName):
   1658         e = Entity(name, publicId, systemId, notationName)
   1659         e.ownerDocument = self
   1660         return e
   1661 
   1662     def _create_notation(self, name, publicId, systemId):
   1663         n = Notation(name, publicId, systemId)
   1664         n.ownerDocument = self
   1665         return n
   1666 
   1667     def getElementById(self, id):
   1668         if id in self._id_cache:
   1669             return self._id_cache[id]
   1670         if not (self._elem_info or self._magic_id_count):
   1671             return None
   1672 
   1673         stack = self._id_search_stack
   1674         if stack is None:
   1675             # we never searched before, or the cache has been cleared

   1676             stack = [self.documentElement]
   1677             self._id_search_stack = stack
   1678         elif not stack:
   1679             # Previous search was completed and cache is still valid;

   1680             # no matching node.

   1681             return None
   1682 
   1683         result = None
   1684         while stack:
   1685             node = stack.pop()
   1686             # add child elements to stack for continued searching

   1687             stack.extend([child for child in node.childNodes
   1688                           if child.nodeType in _nodeTypes_with_children])
   1689             # check this node

   1690             info = self._get_elem_info(node)
   1691             if info:
   1692                 # We have to process all ID attributes before

   1693                 # returning in order to get all the attributes set to

   1694                 # be IDs using Element.setIdAttribute*().

   1695                 for attr in node.attributes.values():
   1696                     if attr.namespaceURI:
   1697                         if info.isIdNS(attr.namespaceURI, attr.localName):
   1698                             self._id_cache[attr.value] = node
   1699                             if attr.value == id:
   1700                                 result = node
   1701                             elif not node._magic_id_nodes:
   1702                                 break
   1703                     elif info.isId(attr.name):
   1704                         self._id_cache[attr.value] = node
   1705                         if attr.value == id:
   1706                             result = node
   1707                         elif not node._magic_id_nodes:
   1708                             break
   1709                     elif attr._is_id:
   1710                         self._id_cache[attr.value] = node
   1711                         if attr.value == id:
   1712                             result = node
   1713                         elif node._magic_id_nodes == 1:
   1714                             break
   1715             elif node._magic_id_nodes:
   1716                 for attr in node.attributes.values():
   1717                     if attr._is_id:
   1718                         self._id_cache[attr.value] = node
   1719                         if attr.value == id:
   1720                             result = node
   1721             if result is not None:
   1722                 break
   1723         return result
   1724 
   1725     def getElementsByTagName(self, name):
   1726         return _get_elements_by_tagName_helper(self, name, NodeList())
   1727 
   1728     def getElementsByTagNameNS(self, namespaceURI, localName):
   1729         return _get_elements_by_tagName_ns_helper(
   1730             self, namespaceURI, localName, NodeList())
   1731 
   1732     def isSupported(self, feature, version):
   1733         return self.implementation.hasFeature(feature, version)
   1734 
   1735     def importNode(self, node, deep):
   1736         if node.nodeType == Node.DOCUMENT_NODE:
   1737             raise xml.dom.NotSupportedErr("cannot import document nodes")
   1738         elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
   1739             raise xml.dom.NotSupportedErr("cannot import document type nodes")
   1740         return _clone_node(node, deep, self)
   1741 
   1742     def writexml(self, writer, indent="", addindent="", newl="",
   1743                  encoding = None):
   1744         if encoding is None:
   1745             writer.write('<?xml version="1.0" ?>'+newl)
   1746         else:
   1747             writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
   1748         for node in self.childNodes:
   1749             node.writexml(writer, indent, addindent, newl)
   1750 
   1751     # DOM Level 3 (WD 9 April 2002)

   1752 
   1753     def renameNode(self, n, namespaceURI, name):
   1754         if n.ownerDocument is not self:
   1755             raise xml.dom.WrongDocumentErr(
   1756                 "cannot rename nodes from other documents;\n"
   1757                 "expected %s,\nfound %s" % (self, n.ownerDocument))
   1758         if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
   1759             raise xml.dom.NotSupportedErr(
   1760                 "renameNode() only applies to element and attribute nodes")
   1761         if namespaceURI != EMPTY_NAMESPACE:
   1762             if ':' in name:
   1763                 prefix, localName = name.split(':', 1)
   1764                 if (  prefix == "xmlns"
   1765                       and namespaceURI != xml.dom.XMLNS_NAMESPACE):
   1766                     raise xml.dom.NamespaceErr(
   1767                         "illegal use of 'xmlns' prefix")
   1768             else:
   1769                 if (  name == "xmlns"
   1770                       and namespaceURI != xml.dom.XMLNS_NAMESPACE
   1771                       and n.nodeType == Node.ATTRIBUTE_NODE):
   1772                     raise xml.dom.NamespaceErr(
   1773                         "illegal use of the 'xmlns' attribute")
   1774                 prefix = None
   1775                 localName = name
   1776         else:
   1777             prefix = None
   1778             localName = None
   1779         if n.nodeType == Node.ATTRIBUTE_NODE:
   1780             element = n.ownerElement
   1781             if element is not None:
   1782                 is_id = n._is_id
   1783                 element.removeAttributeNode(n)
   1784         else:
   1785             element = None
   1786         # avoid __setattr__

   1787         d = n.__dict__
   1788         d['prefix'] = prefix
   1789         d['localName'] = localName
   1790         d['namespaceURI'] = namespaceURI
   1791         d['nodeName'] = name
   1792         if n.nodeType == Node.ELEMENT_NODE:
   1793             d['tagName'] = name
   1794         else:
   1795             # attribute node

   1796             d['name'] = name
   1797             if element is not None:
   1798                 element.setAttributeNode(n)
   1799                 if is_id:
   1800                     element.setIdAttributeNode(n)
   1801         # It's not clear from a semantic perspective whether we should

   1802         # call the user data handlers for the NODE_RENAMED event since

   1803         # we're re-using the existing node.  The draft spec has been

   1804         # interpreted as meaning "no, don't call the handler unless a

   1805         # new node is created."

   1806         return n
   1807 
   1808 defproperty(Document, "documentElement",
   1809             doc="Top-level element of this document.")
   1810 
   1811 
   1812 def _clone_node(node, deep, newOwnerDocument):
   1813     """
   1814     Clone a node and give it the new owner document.
   1815     Called by Node.cloneNode and Document.importNode
   1816     """
   1817     if node.ownerDocument.isSameNode(newOwnerDocument):
   1818         operation = xml.dom.UserDataHandler.NODE_CLONED
   1819     else:
   1820         operation = xml.dom.UserDataHandler.NODE_IMPORTED
   1821     if node.nodeType == Node.ELEMENT_NODE:
   1822         clone = newOwnerDocument.createElementNS(node.namespaceURI,
   1823                                                  node.nodeName)
   1824         for attr in node.attributes.values():
   1825             clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
   1826             a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
   1827             a.specified = attr.specified
   1828 
   1829         if deep:
   1830             for child in node.childNodes:
   1831                 c = _clone_node(child, deep, newOwnerDocument)
   1832                 clone.appendChild(c)
   1833 
   1834     elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
   1835         clone = newOwnerDocument.createDocumentFragment()
   1836         if deep:
   1837             for child in node.childNodes:
   1838                 c = _clone_node(child, deep, newOwnerDocument)
   1839                 clone.appendChild(c)
   1840 
   1841     elif node.nodeType == Node.TEXT_NODE:
   1842         clone = newOwnerDocument.createTextNode(node.data)
   1843     elif node.nodeType == Node.CDATA_SECTION_NODE:
   1844         clone = newOwnerDocument.createCDATASection(node.data)
   1845     elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
   1846         clone = newOwnerDocument.createProcessingInstruction(node.target,
   1847                                                              node.data)
   1848     elif node.nodeType == Node.COMMENT_NODE:
   1849         clone = newOwnerDocument.createComment(node.data)
   1850     elif node.nodeType == Node.ATTRIBUTE_NODE:
   1851         clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
   1852                                                    node.nodeName)
   1853         clone.specified = True
   1854         clone.value = node.value
   1855     elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
   1856         assert node.ownerDocument is not newOwnerDocument
   1857         operation = xml.dom.UserDataHandler.NODE_IMPORTED
   1858         clone = newOwnerDocument.implementation.createDocumentType(
   1859             node.name, node.publicId, node.systemId)
   1860         clone.ownerDocument = newOwnerDocument
   1861         if deep:
   1862             clone.entities._seq = []
   1863             clone.notations._seq = []
   1864             for n in node.notations._seq:
   1865                 notation = Notation(n.nodeName, n.publicId, n.systemId)
   1866                 notation.ownerDocument = newOwnerDocument
   1867                 clone.notations._seq.append(notation)
   1868                 if hasattr(n, '_call_user_data_handler'):
   1869                     n._call_user_data_handler(operation, n, notation)
   1870             for e in node.entities._seq:
   1871                 entity = Entity(e.nodeName, e.publicId, e.systemId,
   1872                                 e.notationName)
   1873                 entity.actualEncoding = e.actualEncoding
   1874                 entity.encoding = e.encoding
   1875                 entity.version = e.version
   1876                 entity.ownerDocument = newOwnerDocument
   1877                 clone.entities._seq.append(entity)
   1878                 if hasattr(e, '_call_user_data_handler'):
   1879                     e._call_user_data_handler(operation, n, entity)
   1880     else:
   1881         # Note the cloning of Document and DocumentType nodes is

   1882         # implementation specific.  minidom handles those cases

   1883         # directly in the cloneNode() methods.

   1884         raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
   1885 
   1886     # Check for _call_user_data_handler() since this could conceivably

   1887     # used with other DOM implementations (one of the FourThought

   1888     # DOMs, perhaps?).

   1889     if hasattr(node, '_call_user_data_handler'):
   1890         node._call_user_data_handler(operation, node, clone)
   1891     return clone
   1892 
   1893 
   1894 def _nssplit(qualifiedName):
   1895     fields = qualifiedName.split(':', 1)
   1896     if len(fields) == 2:
   1897         return fields
   1898     else:
   1899         return (None, fields[0])
   1900 
   1901 
   1902 def _get_StringIO():
   1903     # we can't use cStringIO since it doesn't support Unicode strings

   1904     from StringIO import StringIO
   1905     return StringIO()
   1906 
   1907 def _do_pulldom_parse(func, args, kwargs):
   1908     events = func(*args, **kwargs)
   1909     toktype, rootNode = events.getEvent()
   1910     events.expandNode(rootNode)
   1911     events.clear()
   1912     return rootNode
   1913 
   1914 def parse(file, parser=None, bufsize=None):
   1915     """Parse a file into a DOM by filename or file object."""
   1916     if parser is None and not bufsize:
   1917         from xml.dom import expatbuilder
   1918         return expatbuilder.parse(file)
   1919     else:
   1920         from xml.dom import pulldom
   1921         return _do_pulldom_parse(pulldom.parse, (file,),
   1922             {'parser': parser, 'bufsize': bufsize})
   1923 
   1924 def parseString(string, parser=None):
   1925     """Parse a file into a DOM from a string."""
   1926     if parser is None:
   1927         from xml.dom import expatbuilder
   1928         return expatbuilder.parseString(string)
   1929     else:
   1930         from xml.dom import pulldom
   1931         return _do_pulldom_parse(pulldom.parseString, (string,),
   1932                                  {'parser': parser})
   1933 
   1934 def getDOMImplementation(features=None):
   1935     if features:
   1936         if isinstance(features, StringTypes):
   1937             features = domreg._parse_feature_string(features)
   1938         for f, v in features:
   1939             if not Document.implementation.hasFeature(f, v):
   1940                 return None
   1941     return Document.implementation
   1942