Home | History | Annotate | Download | only in dom
      1 """Simple implementation of the Level 1 DOM.
      2 
      3 Namespaces and other minor Level 2 features are also supported.
      4 
      5 parse("foo.xml")
      6 
      7 parseString("<foo><bar/></foo>")
      8 
      9 Todo:
     10 =====
     11  * convenience methods for getting elements and text.
     12  * more testing
     13  * bring some of the writer and linearizer code into conformance with this
     14         interface
     15  * SAX 2 namespaces
     16 """
     17 
     18 import xml.dom
     19 
     20 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg
     21 from xml.dom.minicompat import *
     22 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS
     23 
     24 # This is used by the ID-cache invalidation checks; the list isn't
     25 # actually complete, since the nodes being checked will never be the
     26 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE.  (The node being checked is
     27 # the node being added or removed, not the node being modified.)
     28 #
     29 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE,
     30                             xml.dom.Node.ENTITY_REFERENCE_NODE)
     31 
     32 
     33 class Node(xml.dom.Node):
     34     namespaceURI = None # this is non-null only for elements and attributes
     35     parentNode = None
     36     ownerDocument = None
     37     nextSibling = None
     38     previousSibling = None
     39 
     40     prefix = EMPTY_PREFIX # non-null only for NS elements and attributes
     41 
     42     def __nonzero__(self):
     43         return True
     44 
     45     def toxml(self, encoding = None):
     46         return self.toprettyxml("", "", encoding)
     47 
     48     def toprettyxml(self, indent="\t", newl="\n", encoding = None):
     49         # indent = the indentation string to prepend, per level
     50         # newl = the newline string to append
     51         writer = _get_StringIO()
     52         if encoding is not None:
     53             import codecs
     54             # Can't use codecs.getwriter to preserve 2.0 compatibility
     55             writer = codecs.lookup(encoding)[3](writer)
     56         if self.nodeType == Node.DOCUMENT_NODE:
     57             # Can pass encoding only to document, to put it into XML header
     58             self.writexml(writer, "", indent, newl, encoding)
     59         else:
     60             self.writexml(writer, "", indent, newl)
     61         return writer.getvalue()
     62 
     63     def hasChildNodes(self):
     64         if self.childNodes:
     65             return True
     66         else:
     67             return False
     68 
     69     def _get_childNodes(self):
     70         return self.childNodes
     71 
     72     def _get_firstChild(self):
     73         if self.childNodes:
     74             return self.childNodes[0]
     75 
     76     def _get_lastChild(self):
     77         if self.childNodes:
     78             return self.childNodes[-1]
     79 
     80     def insertBefore(self, newChild, refChild):
     81         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
     82             for c in tuple(newChild.childNodes):
     83                 self.insertBefore(c, refChild)
     84             ### The DOM does not clearly specify what to return in this case
     85             return newChild
     86         if newChild.nodeType not in self._child_node_types:
     87             raise xml.dom.HierarchyRequestErr(
     88                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
     89         if newChild.parentNode is not None:
     90             newChild.parentNode.removeChild(newChild)
     91         if refChild is None:
     92             self.appendChild(newChild)
     93         else:
     94             try:
     95                 index = self.childNodes.index(refChild)
     96             except ValueError:
     97                 raise xml.dom.NotFoundErr()
     98             if newChild.nodeType in _nodeTypes_with_children:
     99                 _clear_id_cache(self)
    100             self.childNodes.insert(index, newChild)
    101             newChild.nextSibling = refChild
    102             refChild.previousSibling = newChild
    103             if index:
    104                 node = self.childNodes[index-1]
    105                 node.nextSibling = newChild
    106                 newChild.previousSibling = node
    107             else:
    108                 newChild.previousSibling = None
    109             newChild.parentNode = self
    110         return newChild
    111 
    112     def appendChild(self, node):
    113         if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
    114             for c in tuple(node.childNodes):
    115                 self.appendChild(c)
    116             ### The DOM does not clearly specify what to return in this case
    117             return node
    118         if node.nodeType not in self._child_node_types:
    119             raise xml.dom.HierarchyRequestErr(
    120                 "%s cannot be child of %s" % (repr(node), repr(self)))
    121         elif node.nodeType in _nodeTypes_with_children:
    122             _clear_id_cache(self)
    123         if node.parentNode is not None:
    124             node.parentNode.removeChild(node)
    125         _append_child(self, node)
    126         node.nextSibling = None
    127         return node
    128 
    129     def replaceChild(self, newChild, oldChild):
    130         if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
    131             refChild = oldChild.nextSibling
    132             self.removeChild(oldChild)
    133             return self.insertBefore(newChild, refChild)
    134         if newChild.nodeType not in self._child_node_types:
    135             raise xml.dom.HierarchyRequestErr(
    136                 "%s cannot be child of %s" % (repr(newChild), repr(self)))
    137         if newChild is oldChild:
    138             return
    139         if newChild.parentNode is not None:
    140             newChild.parentNode.removeChild(newChild)
    141         try:
    142             index = self.childNodes.index(oldChild)
    143         except ValueError:
    144             raise xml.dom.NotFoundErr()
    145         self.childNodes[index] = newChild
    146         newChild.parentNode = self
    147         oldChild.parentNode = None
    148         if (newChild.nodeType in _nodeTypes_with_children
    149             or oldChild.nodeType in _nodeTypes_with_children):
    150             _clear_id_cache(self)
    151         newChild.nextSibling = oldChild.nextSibling
    152         newChild.previousSibling = oldChild.previousSibling
    153         oldChild.nextSibling = None
    154         oldChild.previousSibling = None
    155         if newChild.previousSibling:
    156             newChild.previousSibling.nextSibling = newChild
    157         if newChild.nextSibling:
    158             newChild.nextSibling.previousSibling = newChild
    159         return oldChild
    160 
    161     def removeChild(self, oldChild):
    162         try:
    163             self.childNodes.remove(oldChild)
    164         except ValueError:
    165             raise xml.dom.NotFoundErr()
    166         if oldChild.nextSibling is not None:
    167             oldChild.nextSibling.previousSibling = oldChild.previousSibling
    168         if oldChild.previousSibling is not None:
    169             oldChild.previousSibling.nextSibling = oldChild.nextSibling
    170         oldChild.nextSibling = oldChild.previousSibling = None
    171         if oldChild.nodeType in _nodeTypes_with_children:
    172             _clear_id_cache(self)
    173 
    174         oldChild.parentNode = None
    175         return oldChild
    176 
    177     def normalize(self):
    178         L = []
    179         for child in self.childNodes:
    180             if child.nodeType == Node.TEXT_NODE:
    181                 if not child.data:
    182                     # empty text node; discard
    183                     if L:
    184                         L[-1].nextSibling = child.nextSibling
    185                     if child.nextSibling:
    186                         child.nextSibling.previousSibling = child.previousSibling
    187                     child.unlink()
    188                 elif L and L[-1].nodeType == child.nodeType:
    189                     # collapse text node
    190                     node = L[-1]
    191                     node.data = node.data + child.data
    192                     node.nextSibling = child.nextSibling
    193                     if child.nextSibling:
    194                         child.nextSibling.previousSibling = node
    195                     child.unlink()
    196                 else:
    197                     L.append(child)
    198             else:
    199                 L.append(child)
    200                 if child.nodeType == Node.ELEMENT_NODE:
    201                     child.normalize()
    202         self.childNodes[:] = L
    203 
    204     def cloneNode(self, deep):
    205         return _clone_node(self, deep, self.ownerDocument or self)
    206 
    207     def isSupported(self, feature, version):
    208         return self.ownerDocument.implementation.hasFeature(feature, version)
    209 
    210     def _get_localName(self):
    211         # Overridden in Element and Attr where localName can be Non-Null
    212         return None
    213 
    214     # Node interfaces from Level 3 (WD 9 April 2002)
    215 
    216     def isSameNode(self, other):
    217         return self is other
    218 
    219     def getInterface(self, feature):
    220         if self.isSupported(feature, None):
    221             return self
    222         else:
    223             return None
    224 
    225     # The "user data" functions use a dictionary that is only present
    226     # if some user data has been set, so be careful not to assume it
    227     # exists.
    228 
    229     def getUserData(self, key):
    230         try:
    231             return self._user_data[key][0]
    232         except (AttributeError, KeyError):
    233             return None
    234 
    235     def setUserData(self, key, data, handler):
    236         old = None
    237         try:
    238             d = self._user_data
    239         except AttributeError:
    240             d = {}
    241             self._user_data = d
    242         if key in d:
    243             old = d[key][0]
    244         if data is None:
    245             # ignore handlers passed for None
    246             handler = None
    247             if old is not None:
    248                 del d[key]
    249         else:
    250             d[key] = (data, handler)
    251         return old
    252 
    253     def _call_user_data_handler(self, operation, src, dst):
    254         if hasattr(self, "_user_data"):
    255             for key, (data, handler) in self._user_data.items():
    256                 if handler is not None:
    257                     handler.handle(operation, key, data, src, dst)
    258 
    259     # minidom-specific API:
    260 
    261     def unlink(self):
    262         self.parentNode = self.ownerDocument = None
    263         if self.childNodes:
    264             for child in self.childNodes:
    265                 child.unlink()
    266             self.childNodes = NodeList()
    267         self.previousSibling = None
    268         self.nextSibling = None
    269 
    270 defproperty(Node, "firstChild", doc="First child node, or None.")
    271 defproperty(Node, "lastChild",  doc="Last child node, or None.")
    272 defproperty(Node, "localName",  doc="Namespace-local name of this node.")
    273 
    274 
    275 def _append_child(self, node):
    276     # fast path with less checks; usable by DOM builders if careful
    277     childNodes = self.childNodes
    278     if childNodes:
    279         last = childNodes[-1]
    280         node.__dict__["previousSibling"] = last
    281         last.__dict__["nextSibling"] = node
    282     childNodes.append(node)
    283     node.__dict__["parentNode"] = self
    284 
    285 def _in_document(node):
    286     # return True iff node is part of a document tree
    287     while node is not None:
    288         if node.nodeType == Node.DOCUMENT_NODE:
    289             return True
    290         node = node.parentNode
    291     return False
    292 
    293 def _write_data(writer, data):
    294     "Writes datachars to writer."
    295     if data:
    296         data = data.replace("&", "&amp;").replace("<", "&lt;"). \
    297                     replace("\"", "&quot;").replace(">", "&gt;")
    298         writer.write(data)
    299 
    300 def _get_elements_by_tagName_helper(parent, name, rc):
    301     for node in parent.childNodes:
    302         if node.nodeType == Node.ELEMENT_NODE and \
    303             (name == "*" or node.tagName == name):
    304             rc.append(node)
    305         _get_elements_by_tagName_helper(node, name, rc)
    306     return rc
    307 
    308 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc):
    309     for node in parent.childNodes:
    310         if node.nodeType == Node.ELEMENT_NODE:
    311             if ((localName == "*" or node.localName == localName) and
    312                 (nsURI == "*" or node.namespaceURI == nsURI)):
    313                 rc.append(node)
    314             _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc)
    315     return rc
    316 
    317 class DocumentFragment(Node):
    318     nodeType = Node.DOCUMENT_FRAGMENT_NODE
    319     nodeName = "#document-fragment"
    320     nodeValue = None
    321     attributes = None
    322     parentNode = None
    323     _child_node_types = (Node.ELEMENT_NODE,
    324                          Node.TEXT_NODE,
    325                          Node.CDATA_SECTION_NODE,
    326                          Node.ENTITY_REFERENCE_NODE,
    327                          Node.PROCESSING_INSTRUCTION_NODE,
    328                          Node.COMMENT_NODE,
    329                          Node.NOTATION_NODE)
    330 
    331     def __init__(self):
    332         self.childNodes = NodeList()
    333 
    334 
    335 class Attr(Node):
    336     nodeType = Node.ATTRIBUTE_NODE
    337     attributes = None
    338     ownerElement = None
    339     specified = False
    340     _is_id = False
    341 
    342     _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE)
    343 
    344     def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None,
    345                  prefix=None):
    346         # skip setattr for performance
    347         d = self.__dict__
    348         d["nodeName"] = d["name"] = qName
    349         d["namespaceURI"] = namespaceURI
    350         d["prefix"] = prefix
    351         d['childNodes'] = NodeList()
    352 
    353         # Add the single child node that represents the value of the attr
    354         self.childNodes.append(Text())
    355 
    356         # nodeValue and value are set elsewhere
    357 
    358     def _get_localName(self):
    359         return self.nodeName.split(":", 1)[-1]
    360 
    361     def _get_name(self):
    362         return self.name
    363 
    364     def _get_specified(self):
    365         return self.specified
    366 
    367     def __setattr__(self, name, value):
    368         d = self.__dict__
    369         if name in ("value", "nodeValue"):
    370             d["value"] = d["nodeValue"] = value
    371             d2 = self.childNodes[0].__dict__
    372             d2["data"] = d2["nodeValue"] = value
    373             if self.ownerElement is not None:
    374                 _clear_id_cache(self.ownerElement)
    375         elif name in ("name", "nodeName"):
    376             d["name"] = d["nodeName"] = value
    377             if self.ownerElement is not None:
    378                 _clear_id_cache(self.ownerElement)
    379         else:
    380             d[name] = value
    381 
    382     def _set_prefix(self, prefix):
    383         nsuri = self.namespaceURI
    384         if prefix == "xmlns":
    385             if nsuri and nsuri != XMLNS_NAMESPACE:
    386                 raise xml.dom.NamespaceErr(
    387                     "illegal use of 'xmlns' prefix for the wrong namespace")
    388         d = self.__dict__
    389         d['prefix'] = prefix
    390         if prefix is None:
    391             newName = self.localName
    392         else:
    393             newName = "%s:%s" % (prefix, self.localName)
    394         if self.ownerElement:
    395             _clear_id_cache(self.ownerElement)
    396         d['nodeName'] = d['name'] = newName
    397 
    398     def _set_value(self, value):
    399         d = self.__dict__
    400         d['value'] = d['nodeValue'] = value
    401         if self.ownerElement:
    402             _clear_id_cache(self.ownerElement)
    403         self.childNodes[0].data = value
    404 
    405     def unlink(self):
    406         # This implementation does not call the base implementation
    407         # since most of that is not needed, and the expense of the
    408         # method call is not warranted.  We duplicate the removal of
    409         # children, but that's all we needed from the base class.
    410         elem = self.ownerElement
    411         if elem is not None:
    412             del elem._attrs[self.nodeName]
    413             del elem._attrsNS[(self.namespaceURI, self.localName)]
    414             if self._is_id:
    415                 self._is_id = False
    416                 elem._magic_id_nodes -= 1
    417                 self.ownerDocument._magic_id_count -= 1
    418         for child in self.childNodes:
    419             child.unlink()
    420         del self.childNodes[:]
    421 
    422     def _get_isId(self):
    423         if self._is_id:
    424             return True
    425         doc = self.ownerDocument
    426         elem = self.ownerElement
    427         if doc is None or elem is None:
    428             return False
    429 
    430         info = doc._get_elem_info(elem)
    431         if info is None:
    432             return False
    433         if self.namespaceURI:
    434             return info.isIdNS(self.namespaceURI, self.localName)
    435         else:
    436             return info.isId(self.nodeName)
    437 
    438     def _get_schemaType(self):
    439         doc = self.ownerDocument
    440         elem = self.ownerElement
    441         if doc is None or elem is None:
    442             return _no_type
    443 
    444         info = doc._get_elem_info(elem)
    445         if info is None:
    446             return _no_type
    447         if self.namespaceURI:
    448             return info.getAttributeTypeNS(self.namespaceURI, self.localName)
    449         else:
    450             return info.getAttributeType(self.nodeName)
    451 
    452 defproperty(Attr, "isId",       doc="True if this attribute is an ID.")
    453 defproperty(Attr, "localName",  doc="Namespace-local name of this attribute.")
    454 defproperty(Attr, "schemaType", doc="Schema type for this attribute.")
    455 
    456 
    457 class NamedNodeMap(object):
    458     """The attribute list is a transient interface to the underlying
    459     dictionaries.  Mutations here will change the underlying element's
    460     dictionary.
    461 
    462     Ordering is imposed artificially and does not reflect the order of
    463     attributes as found in an input document.
    464     """
    465 
    466     __slots__ = ('_attrs', '_attrsNS', '_ownerElement')
    467 
    468     def __init__(self, attrs, attrsNS, ownerElement):
    469         self._attrs = attrs
    470         self._attrsNS = attrsNS
    471         self._ownerElement = ownerElement
    472 
    473     def _get_length(self):
    474         return len(self._attrs)
    475 
    476     def item(self, index):
    477         try:
    478             return self[self._attrs.keys()[index]]
    479         except IndexError:
    480             return None
    481 
    482     def items(self):
    483         L = []
    484         for node in self._attrs.values():
    485             L.append((node.nodeName, node.value))
    486         return L
    487 
    488     def itemsNS(self):
    489         L = []
    490         for node in self._attrs.values():
    491             L.append(((node.namespaceURI, node.localName), node.value))
    492         return L
    493 
    494     def has_key(self, key):
    495         if isinstance(key, StringTypes):
    496             return key in self._attrs
    497         else:
    498             return key in self._attrsNS
    499 
    500     def keys(self):
    501         return self._attrs.keys()
    502 
    503     def keysNS(self):
    504         return self._attrsNS.keys()
    505 
    506     def values(self):
    507         return self._attrs.values()
    508 
    509     def get(self, name, value=None):
    510         return self._attrs.get(name, value)
    511 
    512     __len__ = _get_length
    513 
    514     __hash__ = None # Mutable type can't be correctly hashed
    515     def __cmp__(self, other):
    516         if self._attrs is getattr(other, "_attrs", None):
    517             return 0
    518         else:
    519             return cmp(id(self), id(other))
    520 
    521     def __getitem__(self, attname_or_tuple):
    522         if isinstance(attname_or_tuple, tuple):
    523             return self._attrsNS[attname_or_tuple]
    524         else:
    525             return self._attrs[attname_or_tuple]
    526 
    527     # same as set
    528     def __setitem__(self, attname, value):
    529         if isinstance(value, StringTypes):
    530             try:
    531                 node = self._attrs[attname]
    532             except KeyError:
    533                 node = Attr(attname)
    534                 node.ownerDocument = self._ownerElement.ownerDocument
    535                 self.setNamedItem(node)
    536             node.value = value
    537         else:
    538             if not isinstance(value, Attr):
    539                 raise TypeError, "value must be a string or Attr object"
    540             node = value
    541             self.setNamedItem(node)
    542 
    543     def getNamedItem(self, name):
    544         try:
    545             return self._attrs[name]
    546         except KeyError:
    547             return None
    548 
    549     def getNamedItemNS(self, namespaceURI, localName):
    550         try:
    551             return self._attrsNS[(namespaceURI, localName)]
    552         except KeyError:
    553             return None
    554 
    555     def removeNamedItem(self, name):
    556         n = self.getNamedItem(name)
    557         if n is not None:
    558             _clear_id_cache(self._ownerElement)
    559             del self._attrs[n.nodeName]
    560             del self._attrsNS[(n.namespaceURI, n.localName)]
    561             if 'ownerElement' in n.__dict__:
    562                 n.__dict__['ownerElement'] = None
    563             return n
    564         else:
    565             raise xml.dom.NotFoundErr()
    566 
    567     def removeNamedItemNS(self, namespaceURI, localName):
    568         n = self.getNamedItemNS(namespaceURI, localName)
    569         if n is not None:
    570             _clear_id_cache(self._ownerElement)
    571             del self._attrsNS[(n.namespaceURI, n.localName)]
    572             del self._attrs[n.nodeName]
    573             if 'ownerElement' in n.__dict__:
    574                 n.__dict__['ownerElement'] = None
    575             return n
    576         else:
    577             raise xml.dom.NotFoundErr()
    578 
    579     def setNamedItem(self, node):
    580         if not isinstance(node, Attr):
    581             raise xml.dom.HierarchyRequestErr(
    582                 "%s cannot be child of %s" % (repr(node), repr(self)))
    583         old = self._attrs.get(node.name)
    584         if old:
    585             old.unlink()
    586         self._attrs[node.name] = node
    587         self._attrsNS[(node.namespaceURI, node.localName)] = node
    588         node.ownerElement = self._ownerElement
    589         _clear_id_cache(node.ownerElement)
    590         return old
    591 
    592     def setNamedItemNS(self, node):
    593         return self.setNamedItem(node)
    594 
    595     def __delitem__(self, attname_or_tuple):
    596         node = self[attname_or_tuple]
    597         _clear_id_cache(node.ownerElement)
    598         node.unlink()
    599 
    600     def __getstate__(self):
    601         return self._attrs, self._attrsNS, self._ownerElement
    602 
    603     def __setstate__(self, state):
    604         self._attrs, self._attrsNS, self._ownerElement = state
    605 
    606 defproperty(NamedNodeMap, "length",
    607             doc="Number of nodes in the NamedNodeMap.")
    608 
    609 AttributeList = NamedNodeMap
    610 
    611 
    612 class TypeInfo(object):
    613     __slots__ = 'namespace', 'name'
    614 
    615     def __init__(self, namespace, name):
    616         self.namespace = namespace
    617         self.name = name
    618 
    619     def __repr__(self):
    620         if self.namespace:
    621             return "<TypeInfo %r (from %r)>" % (self.name, self.namespace)
    622         else:
    623             return "<TypeInfo %r>" % self.name
    624 
    625     def _get_name(self):
    626         return self.name
    627 
    628     def _get_namespace(self):
    629         return self.namespace
    630 
    631 _no_type = TypeInfo(None, None)
    632 
    633 class Element(Node):
    634     nodeType = Node.ELEMENT_NODE
    635     nodeValue = None
    636     schemaType = _no_type
    637 
    638     _magic_id_nodes = 0
    639 
    640     _child_node_types = (Node.ELEMENT_NODE,
    641                          Node.PROCESSING_INSTRUCTION_NODE,
    642                          Node.COMMENT_NODE,
    643                          Node.TEXT_NODE,
    644                          Node.CDATA_SECTION_NODE,
    645                          Node.ENTITY_REFERENCE_NODE)
    646 
    647     def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None,
    648                  localName=None):
    649         self.tagName = self.nodeName = tagName
    650         self.prefix = prefix
    651         self.namespaceURI = namespaceURI
    652         self.childNodes = NodeList()
    653 
    654         self._attrs = {}   # attributes are double-indexed:
    655         self._attrsNS = {} #    tagName -> Attribute
    656                            #    URI,localName -> Attribute
    657                            # in the future: consider lazy generation
    658                            # of attribute objects this is too tricky
    659                            # for now because of headaches with
    660                            # namespaces.
    661 
    662     def _get_localName(self):
    663         return self.tagName.split(":", 1)[-1]
    664 
    665     def _get_tagName(self):
    666         return self.tagName
    667 
    668     def unlink(self):
    669         for attr in self._attrs.values():
    670             attr.unlink()
    671         self._attrs = None
    672         self._attrsNS = None
    673         Node.unlink(self)
    674 
    675     def getAttribute(self, attname):
    676         try:
    677             return self._attrs[attname].value
    678         except KeyError:
    679             return ""
    680 
    681     def getAttributeNS(self, namespaceURI, localName):
    682         try:
    683             return self._attrsNS[(namespaceURI, localName)].value
    684         except KeyError:
    685             return ""
    686 
    687     def setAttribute(self, attname, value):
    688         attr = self.getAttributeNode(attname)
    689         if attr is None:
    690             attr = Attr(attname)
    691             # for performance
    692             d = attr.__dict__
    693             d["value"] = d["nodeValue"] = value
    694             d["ownerDocument"] = self.ownerDocument
    695             self.setAttributeNode(attr)
    696         elif value != attr.value:
    697             d = attr.__dict__
    698             d["value"] = d["nodeValue"] = value
    699             if attr.isId:
    700                 _clear_id_cache(self)
    701 
    702     def setAttributeNS(self, namespaceURI, qualifiedName, value):
    703         prefix, localname = _nssplit(qualifiedName)
    704         attr = self.getAttributeNodeNS(namespaceURI, localname)
    705         if attr is None:
    706             # for performance
    707             attr = Attr(qualifiedName, namespaceURI, localname, prefix)
    708             d = attr.__dict__
    709             d["prefix"] = prefix
    710             d["nodeName"] = qualifiedName
    711             d["value"] = d["nodeValue"] = value
    712             d["ownerDocument"] = self.ownerDocument
    713             self.setAttributeNode(attr)
    714         else:
    715             d = attr.__dict__
    716             if value != attr.value:
    717                 d["value"] = d["nodeValue"] = value
    718                 if attr.isId:
    719                     _clear_id_cache(self)
    720             if attr.prefix != prefix:
    721                 d["prefix"] = prefix
    722                 d["nodeName"] = qualifiedName
    723 
    724     def getAttributeNode(self, attrname):
    725         return self._attrs.get(attrname)
    726 
    727     def getAttributeNodeNS(self, namespaceURI, localName):
    728         return self._attrsNS.get((namespaceURI, localName))
    729 
    730     def setAttributeNode(self, attr):
    731         if attr.ownerElement not in (None, self):
    732             raise xml.dom.InuseAttributeErr("attribute node already owned")
    733         old1 = self._attrs.get(attr.name, None)
    734         if old1 is not None:
    735             self.removeAttributeNode(old1)
    736         old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None)
    737         if old2 is not None and old2 is not old1:
    738             self.removeAttributeNode(old2)
    739         _set_attribute_node(self, attr)
    740 
    741         if old1 is not attr:
    742             # It might have already been part of this node, in which case
    743             # it doesn't represent a change, and should not be returned.
    744             return old1
    745         if old2 is not attr:
    746             return old2
    747 
    748     setAttributeNodeNS = setAttributeNode
    749 
    750     def removeAttribute(self, name):
    751         try:
    752             attr = self._attrs[name]
    753         except KeyError:
    754             raise xml.dom.NotFoundErr()
    755         self.removeAttributeNode(attr)
    756 
    757     def removeAttributeNS(self, namespaceURI, localName):
    758         try:
    759             attr = self._attrsNS[(namespaceURI, localName)]
    760         except KeyError:
    761             raise xml.dom.NotFoundErr()
    762         self.removeAttributeNode(attr)
    763 
    764     def removeAttributeNode(self, node):
    765         if node is None:
    766             raise xml.dom.NotFoundErr()
    767         try:
    768             self._attrs[node.name]
    769         except KeyError:
    770             raise xml.dom.NotFoundErr()
    771         _clear_id_cache(self)
    772         node.unlink()
    773         # Restore this since the node is still useful and otherwise
    774         # unlinked
    775         node.ownerDocument = self.ownerDocument
    776 
    777     removeAttributeNodeNS = removeAttributeNode
    778 
    779     def hasAttribute(self, name):
    780         return name in self._attrs
    781 
    782     def hasAttributeNS(self, namespaceURI, localName):
    783         return (namespaceURI, localName) in self._attrsNS
    784 
    785     def getElementsByTagName(self, name):
    786         return _get_elements_by_tagName_helper(self, name, NodeList())
    787 
    788     def getElementsByTagNameNS(self, namespaceURI, localName):
    789         return _get_elements_by_tagName_ns_helper(
    790             self, namespaceURI, localName, NodeList())
    791 
    792     def __repr__(self):
    793         return "<DOM Element: %s at %#x>" % (self.tagName, id(self))
    794 
    795     def writexml(self, writer, indent="", addindent="", newl=""):
    796         # indent = current indentation
    797         # addindent = indentation to add to higher levels
    798         # newl = newline string
    799         writer.write(indent+"<" + self.tagName)
    800 
    801         attrs = self._get_attributes()
    802         a_names = attrs.keys()
    803         a_names.sort()
    804 
    805         for a_name in a_names:
    806             writer.write(" %s=\"" % a_name)
    807             _write_data(writer, attrs[a_name].value)
    808             writer.write("\"")
    809         if self.childNodes:
    810             writer.write(">")
    811             if (len(self.childNodes) == 1 and
    812                 self.childNodes[0].nodeType == Node.TEXT_NODE):
    813                 self.childNodes[0].writexml(writer, '', '', '')
    814             else:
    815                 writer.write(newl)
    816                 for node in self.childNodes:
    817                     node.writexml(writer, indent+addindent, addindent, newl)
    818                 writer.write(indent)
    819             writer.write("</%s>%s" % (self.tagName, newl))
    820         else:
    821             writer.write("/>%s"%(newl))
    822 
    823     def _get_attributes(self):
    824         return NamedNodeMap(self._attrs, self._attrsNS, self)
    825 
    826     def hasAttributes(self):
    827         if self._attrs:
    828             return True
    829         else:
    830             return False
    831 
    832     # DOM Level 3 attributes, based on the 22 Oct 2002 draft
    833 
    834     def setIdAttribute(self, name):
    835         idAttr = self.getAttributeNode(name)
    836         self.setIdAttributeNode(idAttr)
    837 
    838     def setIdAttributeNS(self, namespaceURI, localName):
    839         idAttr = self.getAttributeNodeNS(namespaceURI, localName)
    840         self.setIdAttributeNode(idAttr)
    841 
    842     def setIdAttributeNode(self, idAttr):
    843         if idAttr is None or not self.isSameNode(idAttr.ownerElement):
    844             raise xml.dom.NotFoundErr()
    845         if _get_containing_entref(self) is not None:
    846             raise xml.dom.NoModificationAllowedErr()
    847         if not idAttr._is_id:
    848             idAttr.__dict__['_is_id'] = True
    849             self._magic_id_nodes += 1
    850             self.ownerDocument._magic_id_count += 1
    851             _clear_id_cache(self)
    852 
    853 defproperty(Element, "attributes",
    854             doc="NamedNodeMap of attributes on the element.")
    855 defproperty(Element, "localName",
    856             doc="Namespace-local name of this element.")
    857 
    858 
    859 def _set_attribute_node(element, attr):
    860     _clear_id_cache(element)
    861     element._attrs[attr.name] = attr
    862     element._attrsNS[(attr.namespaceURI, attr.localName)] = attr
    863 
    864     # This creates a circular reference, but Element.unlink()
    865     # breaks the cycle since the references to the attribute
    866     # dictionaries are tossed.
    867     attr.__dict__['ownerElement'] = element
    868 
    869 
    870 class Childless:
    871     """Mixin that makes childless-ness easy to implement and avoids
    872     the complexity of the Node methods that deal with children.
    873     """
    874 
    875     attributes = None
    876     childNodes = EmptyNodeList()
    877     firstChild = None
    878     lastChild = None
    879 
    880     def _get_firstChild(self):
    881         return None
    882 
    883     def _get_lastChild(self):
    884         return None
    885 
    886     def appendChild(self, node):
    887         raise xml.dom.HierarchyRequestErr(
    888             self.nodeName + " nodes cannot have children")
    889 
    890     def hasChildNodes(self):
    891         return False
    892 
    893     def insertBefore(self, newChild, refChild):
    894         raise xml.dom.HierarchyRequestErr(
    895             self.nodeName + " nodes do not have children")
    896 
    897     def removeChild(self, oldChild):
    898         raise xml.dom.NotFoundErr(
    899             self.nodeName + " nodes do not have children")
    900 
    901     def normalize(self):
    902         # For childless nodes, normalize() has nothing to do.
    903         pass
    904 
    905     def replaceChild(self, newChild, oldChild):
    906         raise xml.dom.HierarchyRequestErr(
    907             self.nodeName + " nodes do not have children")
    908 
    909 
    910 class ProcessingInstruction(Childless, Node):
    911     nodeType = Node.PROCESSING_INSTRUCTION_NODE
    912 
    913     def __init__(self, target, data):
    914         self.target = self.nodeName = target
    915         self.data = self.nodeValue = data
    916 
    917     def _get_data(self):
    918         return self.data
    919     def _set_data(self, value):
    920         d = self.__dict__
    921         d['data'] = d['nodeValue'] = value
    922 
    923     def _get_target(self):
    924         return self.target
    925     def _set_target(self, value):
    926         d = self.__dict__
    927         d['target'] = d['nodeName'] = value
    928 
    929     def __setattr__(self, name, value):
    930         if name == "data" or name == "nodeValue":
    931             self.__dict__['data'] = self.__dict__['nodeValue'] = value
    932         elif name == "target" or name == "nodeName":
    933             self.__dict__['target'] = self.__dict__['nodeName'] = value
    934         else:
    935             self.__dict__[name] = value
    936 
    937     def writexml(self, writer, indent="", addindent="", newl=""):
    938         writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl))
    939 
    940 
    941 class CharacterData(Childless, Node):
    942     def _get_length(self):
    943         return len(self.data)
    944     __len__ = _get_length
    945 
    946     def _get_data(self):
    947         return self.__dict__['data']
    948     def _set_data(self, data):
    949         d = self.__dict__
    950         d['data'] = d['nodeValue'] = data
    951 
    952     _get_nodeValue = _get_data
    953     _set_nodeValue = _set_data
    954 
    955     def __setattr__(self, name, value):
    956         if name == "data" or name == "nodeValue":
    957             self.__dict__['data'] = self.__dict__['nodeValue'] = value
    958         else:
    959             self.__dict__[name] = value
    960 
    961     def __repr__(self):
    962         data = self.data
    963         if len(data) > 10:
    964             dotdotdot = "..."
    965         else:
    966             dotdotdot = ""
    967         return '<DOM %s node "%r%s">' % (
    968             self.__class__.__name__, data[0:10], dotdotdot)
    969 
    970     def substringData(self, offset, count):
    971         if offset < 0:
    972             raise xml.dom.IndexSizeErr("offset cannot be negative")
    973         if offset >= len(self.data):
    974             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    975         if count < 0:
    976             raise xml.dom.IndexSizeErr("count cannot be negative")
    977         return self.data[offset:offset+count]
    978 
    979     def appendData(self, arg):
    980         self.data = self.data + arg
    981 
    982     def insertData(self, offset, arg):
    983         if offset < 0:
    984             raise xml.dom.IndexSizeErr("offset cannot be negative")
    985         if offset >= len(self.data):
    986             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    987         if arg:
    988             self.data = "%s%s%s" % (
    989                 self.data[:offset], arg, self.data[offset:])
    990 
    991     def deleteData(self, offset, count):
    992         if offset < 0:
    993             raise xml.dom.IndexSizeErr("offset cannot be negative")
    994         if offset >= len(self.data):
    995             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
    996         if count < 0:
    997             raise xml.dom.IndexSizeErr("count cannot be negative")
    998         if count:
    999             self.data = self.data[:offset] + self.data[offset+count:]
   1000 
   1001     def replaceData(self, offset, count, arg):
   1002         if offset < 0:
   1003             raise xml.dom.IndexSizeErr("offset cannot be negative")
   1004         if offset >= len(self.data):
   1005             raise xml.dom.IndexSizeErr("offset cannot be beyond end of data")
   1006         if count < 0:
   1007             raise xml.dom.IndexSizeErr("count cannot be negative")
   1008         if count:
   1009             self.data = "%s%s%s" % (
   1010                 self.data[:offset], arg, self.data[offset+count:])
   1011 
   1012 defproperty(CharacterData, "length", doc="Length of the string data.")
   1013 
   1014 
   1015 class Text(CharacterData):
   1016     # Make sure we don't add an instance __dict__ if we don't already
   1017     # have one, at least when that's possible:
   1018     # XXX this does not work, CharacterData is an old-style class
   1019     # __slots__ = ()
   1020 
   1021     nodeType = Node.TEXT_NODE
   1022     nodeName = "#text"
   1023     attributes = None
   1024 
   1025     def splitText(self, offset):
   1026         if offset < 0 or offset > len(self.data):
   1027             raise xml.dom.IndexSizeErr("illegal offset value")
   1028         newText = self.__class__()
   1029         newText.data = self.data[offset:]
   1030         newText.ownerDocument = self.ownerDocument
   1031         next = self.nextSibling
   1032         if self.parentNode and self in self.parentNode.childNodes:
   1033             if next is None:
   1034                 self.parentNode.appendChild(newText)
   1035             else:
   1036                 self.parentNode.insertBefore(newText, next)
   1037         self.data = self.data[:offset]
   1038         return newText
   1039 
   1040     def writexml(self, writer, indent="", addindent="", newl=""):
   1041         _write_data(writer, "%s%s%s" % (indent, self.data, newl))
   1042 
   1043     # DOM Level 3 (WD 9 April 2002)
   1044 
   1045     def _get_wholeText(self):
   1046         L = [self.data]
   1047         n = self.previousSibling
   1048         while n is not None:
   1049             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1050                 L.insert(0, n.data)
   1051                 n = n.previousSibling
   1052             else:
   1053                 break
   1054         n = self.nextSibling
   1055         while n is not None:
   1056             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1057                 L.append(n.data)
   1058                 n = n.nextSibling
   1059             else:
   1060                 break
   1061         return ''.join(L)
   1062 
   1063     def replaceWholeText(self, content):
   1064         # XXX This needs to be seriously changed if minidom ever
   1065         # supports EntityReference nodes.
   1066         parent = self.parentNode
   1067         n = self.previousSibling
   1068         while n is not None:
   1069             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1070                 next = n.previousSibling
   1071                 parent.removeChild(n)
   1072                 n = next
   1073             else:
   1074                 break
   1075         n = self.nextSibling
   1076         if not content:
   1077             parent.removeChild(self)
   1078         while n is not None:
   1079             if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
   1080                 next = n.nextSibling
   1081                 parent.removeChild(n)
   1082                 n = next
   1083             else:
   1084                 break
   1085         if content:
   1086             d = self.__dict__
   1087             d['data'] = content
   1088             d['nodeValue'] = content
   1089             return self
   1090         else:
   1091             return None
   1092 
   1093     def _get_isWhitespaceInElementContent(self):
   1094         if self.data.strip():
   1095             return False
   1096         elem = _get_containing_element(self)
   1097         if elem is None:
   1098             return False
   1099         info = self.ownerDocument._get_elem_info(elem)
   1100         if info is None:
   1101             return False
   1102         else:
   1103             return info.isElementContent()
   1104 
   1105 defproperty(Text, "isWhitespaceInElementContent",
   1106             doc="True iff this text node contains only whitespace"
   1107                 " and is in element content.")
   1108 defproperty(Text, "wholeText",
   1109             doc="The text of all logically-adjacent text nodes.")
   1110 
   1111 
   1112 def _get_containing_element(node):
   1113     c = node.parentNode
   1114     while c is not None:
   1115         if c.nodeType == Node.ELEMENT_NODE:
   1116             return c
   1117         c = c.parentNode
   1118     return None
   1119 
   1120 def _get_containing_entref(node):
   1121     c = node.parentNode
   1122     while c is not None:
   1123         if c.nodeType == Node.ENTITY_REFERENCE_NODE:
   1124             return c
   1125         c = c.parentNode
   1126     return None
   1127 
   1128 
   1129 class Comment(Childless, CharacterData):
   1130     nodeType = Node.COMMENT_NODE
   1131     nodeName = "#comment"
   1132 
   1133     def __init__(self, data):
   1134         self.data = self.nodeValue = data
   1135 
   1136     def writexml(self, writer, indent="", addindent="", newl=""):
   1137         if "--" in self.data:
   1138             raise ValueError("'--' is not allowed in a comment node")
   1139         writer.write("%s<!--%s-->%s" % (indent, self.data, newl))
   1140 
   1141 
   1142 class CDATASection(Text):
   1143     # Make sure we don't add an instance __dict__ if we don't already
   1144     # have one, at least when that's possible:
   1145     # XXX this does not work, Text is an old-style class
   1146     # __slots__ = ()
   1147 
   1148     nodeType = Node.CDATA_SECTION_NODE
   1149     nodeName = "#cdata-section"
   1150 
   1151     def writexml(self, writer, indent="", addindent="", newl=""):
   1152         if self.data.find("]]>") >= 0:
   1153             raise ValueError("']]>' not allowed in a CDATA section")
   1154         writer.write("<![CDATA[%s]]>" % self.data)
   1155 
   1156 
   1157 class ReadOnlySequentialNamedNodeMap(object):
   1158     __slots__ = '_seq',
   1159 
   1160     def __init__(self, seq=()):
   1161         # seq should be a list or tuple
   1162         self._seq = seq
   1163 
   1164     def __len__(self):
   1165         return len(self._seq)
   1166 
   1167     def _get_length(self):
   1168         return len(self._seq)
   1169 
   1170     def getNamedItem(self, name):
   1171         for n in self._seq:
   1172             if n.nodeName == name:
   1173                 return n
   1174 
   1175     def getNamedItemNS(self, namespaceURI, localName):
   1176         for n in self._seq:
   1177             if n.namespaceURI == namespaceURI and n.localName == localName:
   1178                 return n
   1179 
   1180     def __getitem__(self, name_or_tuple):
   1181         if isinstance(name_or_tuple, tuple):
   1182             node = self.getNamedItemNS(*name_or_tuple)
   1183         else:
   1184             node = self.getNamedItem(name_or_tuple)
   1185         if node is None:
   1186             raise KeyError, name_or_tuple
   1187         return node
   1188 
   1189     def item(self, index):
   1190         if index < 0:
   1191             return None
   1192         try:
   1193             return self._seq[index]
   1194         except IndexError:
   1195             return None
   1196 
   1197     def removeNamedItem(self, name):
   1198         raise xml.dom.NoModificationAllowedErr(
   1199             "NamedNodeMap instance is read-only")
   1200 
   1201     def removeNamedItemNS(self, namespaceURI, localName):
   1202         raise xml.dom.NoModificationAllowedErr(
   1203             "NamedNodeMap instance is read-only")
   1204 
   1205     def setNamedItem(self, node):
   1206         raise xml.dom.NoModificationAllowedErr(
   1207             "NamedNodeMap instance is read-only")
   1208 
   1209     def setNamedItemNS(self, node):
   1210         raise xml.dom.NoModificationAllowedErr(
   1211             "NamedNodeMap instance is read-only")
   1212 
   1213     def __getstate__(self):
   1214         return [self._seq]
   1215 
   1216     def __setstate__(self, state):
   1217         self._seq = state[0]
   1218 
   1219 defproperty(ReadOnlySequentialNamedNodeMap, "length",
   1220             doc="Number of entries in the NamedNodeMap.")
   1221 
   1222 
   1223 class Identified:
   1224     """Mix-in class that supports the publicId and systemId attributes."""
   1225 
   1226     # XXX this does not work, this is an old-style class
   1227     # __slots__ = 'publicId', 'systemId'
   1228 
   1229     def _identified_mixin_init(self, publicId, systemId):
   1230         self.publicId = publicId
   1231         self.systemId = systemId
   1232 
   1233     def _get_publicId(self):
   1234         return self.publicId
   1235 
   1236     def _get_systemId(self):
   1237         return self.systemId
   1238 
   1239 class DocumentType(Identified, Childless, Node):
   1240     nodeType = Node.DOCUMENT_TYPE_NODE
   1241     nodeValue = None
   1242     name = None
   1243     publicId = None
   1244     systemId = None
   1245     internalSubset = None
   1246 
   1247     def __init__(self, qualifiedName):
   1248         self.entities = ReadOnlySequentialNamedNodeMap()
   1249         self.notations = ReadOnlySequentialNamedNodeMap()
   1250         if qualifiedName:
   1251             prefix, localname = _nssplit(qualifiedName)
   1252             self.name = localname
   1253         self.nodeName = self.name
   1254 
   1255     def _get_internalSubset(self):
   1256         return self.internalSubset
   1257 
   1258     def cloneNode(self, deep):
   1259         if self.ownerDocument is None:
   1260             # it's ok
   1261             clone = DocumentType(None)
   1262             clone.name = self.name
   1263             clone.nodeName = self.name
   1264             operation = xml.dom.UserDataHandler.NODE_CLONED
   1265             if deep:
   1266                 clone.entities._seq = []
   1267                 clone.notations._seq = []
   1268                 for n in self.notations._seq:
   1269                     notation = Notation(n.nodeName, n.publicId, n.systemId)
   1270                     clone.notations._seq.append(notation)
   1271                     n._call_user_data_handler(operation, n, notation)
   1272                 for e in self.entities._seq:
   1273                     entity = Entity(e.nodeName, e.publicId, e.systemId,
   1274                                     e.notationName)
   1275                     entity.actualEncoding = e.actualEncoding
   1276                     entity.encoding = e.encoding
   1277                     entity.version = e.version
   1278                     clone.entities._seq.append(entity)
   1279                     e._call_user_data_handler(operation, n, entity)
   1280             self._call_user_data_handler(operation, self, clone)
   1281             return clone
   1282         else:
   1283             return None
   1284 
   1285     def writexml(self, writer, indent="", addindent="", newl=""):
   1286         writer.write("<!DOCTYPE ")
   1287         writer.write(self.name)
   1288         if self.publicId:
   1289             writer.write("%s  PUBLIC '%s'%s  '%s'"
   1290                          % (newl, self.publicId, newl, self.systemId))
   1291         elif self.systemId:
   1292             writer.write("%s  SYSTEM '%s'" % (newl, self.systemId))
   1293         if self.internalSubset is not None:
   1294             writer.write(" [")
   1295             writer.write(self.internalSubset)
   1296             writer.write("]")
   1297         writer.write(">"+newl)
   1298 
   1299 class Entity(Identified, Node):
   1300     attributes = None
   1301     nodeType = Node.ENTITY_NODE
   1302     nodeValue = None
   1303 
   1304     actualEncoding = None
   1305     encoding = None
   1306     version = None
   1307 
   1308     def __init__(self, name, publicId, systemId, notation):
   1309         self.nodeName = name
   1310         self.notationName = notation
   1311         self.childNodes = NodeList()
   1312         self._identified_mixin_init(publicId, systemId)
   1313 
   1314     def _get_actualEncoding(self):
   1315         return self.actualEncoding
   1316 
   1317     def _get_encoding(self):
   1318         return self.encoding
   1319 
   1320     def _get_version(self):
   1321         return self.version
   1322 
   1323     def appendChild(self, newChild):
   1324         raise xml.dom.HierarchyRequestErr(
   1325             "cannot append children to an entity node")
   1326 
   1327     def insertBefore(self, newChild, refChild):
   1328         raise xml.dom.HierarchyRequestErr(
   1329             "cannot insert children below an entity node")
   1330 
   1331     def removeChild(self, oldChild):
   1332         raise xml.dom.HierarchyRequestErr(
   1333             "cannot remove children from an entity node")
   1334 
   1335     def replaceChild(self, newChild, oldChild):
   1336         raise xml.dom.HierarchyRequestErr(
   1337             "cannot replace children of an entity node")
   1338 
   1339 class Notation(Identified, Childless, Node):
   1340     nodeType = Node.NOTATION_NODE
   1341     nodeValue = None
   1342 
   1343     def __init__(self, name, publicId, systemId):
   1344         self.nodeName = name
   1345         self._identified_mixin_init(publicId, systemId)
   1346 
   1347 
   1348 class DOMImplementation(DOMImplementationLS):
   1349     _features = [("core", "1.0"),
   1350                  ("core", "2.0"),
   1351                  ("core", None),
   1352                  ("xml", "1.0"),
   1353                  ("xml", "2.0"),
   1354                  ("xml", None),
   1355                  ("ls-load", "3.0"),
   1356                  ("ls-load", None),
   1357                  ]
   1358 
   1359     def hasFeature(self, feature, version):
   1360         if version == "":
   1361             version = None
   1362         return (feature.lower(), version) in self._features
   1363 
   1364     def createDocument(self, namespaceURI, qualifiedName, doctype):
   1365         if doctype and doctype.parentNode is not None:
   1366             raise xml.dom.WrongDocumentErr(
   1367                 "doctype object owned by another DOM tree")
   1368         doc = self._create_document()
   1369 
   1370         add_root_element = not (namespaceURI is None
   1371                                 and qualifiedName is None
   1372                                 and doctype is None)
   1373 
   1374         if not qualifiedName and add_root_element:
   1375             # The spec is unclear what to raise here; SyntaxErr
   1376             # would be the other obvious candidate. Since Xerces raises
   1377             # InvalidCharacterErr, and since SyntaxErr is not listed
   1378             # for createDocument, that seems to be the better choice.
   1379             # XXX: need to check for illegal characters here and in
   1380             # createElement.
   1381 
   1382             # DOM Level III clears this up when talking about the return value
   1383             # of this function.  If namespaceURI, qName and DocType are
   1384             # Null the document is returned without a document element
   1385             # Otherwise if doctype or namespaceURI are not None
   1386             # Then we go back to the above problem
   1387             raise xml.dom.InvalidCharacterErr("Element with no name")
   1388 
   1389         if add_root_element:
   1390             prefix, localname = _nssplit(qualifiedName)
   1391             if prefix == "xml" \
   1392                and namespaceURI != "http://www.w3.org/XML/1998/namespace":
   1393                 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix")
   1394             if prefix and not namespaceURI:
   1395                 raise xml.dom.NamespaceErr(
   1396                     "illegal use of prefix without namespaces")
   1397             element = doc.createElementNS(namespaceURI, qualifiedName)
   1398             if doctype:
   1399                 doc.appendChild(doctype)
   1400             doc.appendChild(element)
   1401 
   1402         if doctype:
   1403             doctype.parentNode = doctype.ownerDocument = doc
   1404 
   1405         doc.doctype = doctype
   1406         doc.implementation = self
   1407         return doc
   1408 
   1409     def createDocumentType(self, qualifiedName, publicId, systemId):
   1410         doctype = DocumentType(qualifiedName)
   1411         doctype.publicId = publicId
   1412         doctype.systemId = systemId
   1413         return doctype
   1414 
   1415     # DOM Level 3 (WD 9 April 2002)
   1416 
   1417     def getInterface(self, feature):
   1418         if self.hasFeature(feature, None):
   1419             return self
   1420         else:
   1421             return None
   1422 
   1423     # internal
   1424     def _create_document(self):
   1425         return Document()
   1426 
   1427 class ElementInfo(object):
   1428     """Object that represents content-model information for an element.
   1429 
   1430     This implementation is not expected to be used in practice; DOM
   1431     builders should provide implementations which do the right thing
   1432     using information available to it.
   1433 
   1434     """
   1435 
   1436     __slots__ = 'tagName',
   1437 
   1438     def __init__(self, name):
   1439         self.tagName = name
   1440 
   1441     def getAttributeType(self, aname):
   1442         return _no_type
   1443 
   1444     def getAttributeTypeNS(self, namespaceURI, localName):
   1445         return _no_type
   1446 
   1447     def isElementContent(self):
   1448         return False
   1449 
   1450     def isEmpty(self):
   1451         """Returns true iff this element is declared to have an EMPTY
   1452         content model."""
   1453         return False
   1454 
   1455     def isId(self, aname):
   1456         """Returns true iff the named attribute is a DTD-style ID."""
   1457         return False
   1458 
   1459     def isIdNS(self, namespaceURI, localName):
   1460         """Returns true iff the identified attribute is a DTD-style ID."""
   1461         return False
   1462 
   1463     def __getstate__(self):
   1464         return self.tagName
   1465 
   1466     def __setstate__(self, state):
   1467         self.tagName = state
   1468 
   1469 def _clear_id_cache(node):
   1470     if node.nodeType == Node.DOCUMENT_NODE:
   1471         node._id_cache.clear()
   1472         node._id_search_stack = None
   1473     elif _in_document(node):
   1474         node.ownerDocument._id_cache.clear()
   1475         node.ownerDocument._id_search_stack= None
   1476 
   1477 class Document(Node, DocumentLS):
   1478     _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE,
   1479                          Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE)
   1480 
   1481     nodeType = Node.DOCUMENT_NODE
   1482     nodeName = "#document"
   1483     nodeValue = None
   1484     attributes = None
   1485     doctype = None
   1486     parentNode = None
   1487     previousSibling = nextSibling = None
   1488 
   1489     implementation = DOMImplementation()
   1490 
   1491     # Document attributes from Level 3 (WD 9 April 2002)
   1492 
   1493     actualEncoding = None
   1494     encoding = None
   1495     standalone = None
   1496     version = None
   1497     strictErrorChecking = False
   1498     errorHandler = None
   1499     documentURI = None
   1500 
   1501     _magic_id_count = 0
   1502 
   1503     def __init__(self):
   1504         self.childNodes = NodeList()
   1505         # mapping of (namespaceURI, localName) -> ElementInfo
   1506         #        and tagName -> ElementInfo
   1507         self._elem_info = {}
   1508         self._id_cache = {}
   1509         self._id_search_stack = None
   1510 
   1511     def _get_elem_info(self, element):
   1512         if element.namespaceURI:
   1513             key = element.namespaceURI, element.localName
   1514         else:
   1515             key = element.tagName
   1516         return self._elem_info.get(key)
   1517 
   1518     def _get_actualEncoding(self):
   1519         return self.actualEncoding
   1520 
   1521     def _get_doctype(self):
   1522         return self.doctype
   1523 
   1524     def _get_documentURI(self):
   1525         return self.documentURI
   1526 
   1527     def _get_encoding(self):
   1528         return self.encoding
   1529 
   1530     def _get_errorHandler(self):
   1531         return self.errorHandler
   1532 
   1533     def _get_standalone(self):
   1534         return self.standalone
   1535 
   1536     def _get_strictErrorChecking(self):
   1537         return self.strictErrorChecking
   1538 
   1539     def _get_version(self):
   1540         return self.version
   1541 
   1542     def appendChild(self, node):
   1543         if node.nodeType not in self._child_node_types:
   1544             raise xml.dom.HierarchyRequestErr(
   1545                 "%s cannot be child of %s" % (repr(node), repr(self)))
   1546         if node.parentNode is not None:
   1547             # This needs to be done before the next test since this
   1548             # may *be* the document element, in which case it should
   1549             # end up re-ordered to the end.
   1550             node.parentNode.removeChild(node)
   1551 
   1552         if node.nodeType == Node.ELEMENT_NODE \
   1553            and self._get_documentElement():
   1554             raise xml.dom.HierarchyRequestErr(
   1555                 "two document elements disallowed")
   1556         return Node.appendChild(self, node)
   1557 
   1558     def removeChild(self, oldChild):
   1559         try:
   1560             self.childNodes.remove(oldChild)
   1561         except ValueError:
   1562             raise xml.dom.NotFoundErr()
   1563         oldChild.nextSibling = oldChild.previousSibling = None
   1564         oldChild.parentNode = None
   1565         if self.documentElement is oldChild:
   1566             self.documentElement = None
   1567 
   1568         return oldChild
   1569 
   1570     def _get_documentElement(self):
   1571         for node in self.childNodes:
   1572             if node.nodeType == Node.ELEMENT_NODE:
   1573                 return node
   1574 
   1575     def unlink(self):
   1576         if self.doctype is not None:
   1577             self.doctype.unlink()
   1578             self.doctype = None
   1579         Node.unlink(self)
   1580 
   1581     def cloneNode(self, deep):
   1582         if not deep:
   1583             return None
   1584         clone = self.implementation.createDocument(None, None, None)
   1585         clone.encoding = self.encoding
   1586         clone.standalone = self.standalone
   1587         clone.version = self.version
   1588         for n in self.childNodes:
   1589             childclone = _clone_node(n, deep, clone)
   1590             assert childclone.ownerDocument.isSameNode(clone)
   1591             clone.childNodes.append(childclone)
   1592             if childclone.nodeType == Node.DOCUMENT_NODE:
   1593                 assert clone.documentElement is None
   1594             elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE:
   1595                 assert clone.doctype is None
   1596                 clone.doctype = childclone
   1597             childclone.parentNode = clone
   1598         self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED,
   1599                                      self, clone)
   1600         return clone
   1601 
   1602     def createDocumentFragment(self):
   1603         d = DocumentFragment()
   1604         d.ownerDocument = self
   1605         return d
   1606 
   1607     def createElement(self, tagName):
   1608         e = Element(tagName)
   1609         e.ownerDocument = self
   1610         return e
   1611 
   1612     def createTextNode(self, data):
   1613         if not isinstance(data, StringTypes):
   1614             raise TypeError, "node contents must be a string"
   1615         t = Text()
   1616         t.data = data
   1617         t.ownerDocument = self
   1618         return t
   1619 
   1620     def createCDATASection(self, data):
   1621         if not isinstance(data, StringTypes):
   1622             raise TypeError, "node contents must be a string"
   1623         c = CDATASection()
   1624         c.data = data
   1625         c.ownerDocument = self
   1626         return c
   1627 
   1628     def createComment(self, data):
   1629         c = Comment(data)
   1630         c.ownerDocument = self
   1631         return c
   1632 
   1633     def createProcessingInstruction(self, target, data):
   1634         p = ProcessingInstruction(target, data)
   1635         p.ownerDocument = self
   1636         return p
   1637 
   1638     def createAttribute(self, qName):
   1639         a = Attr(qName)
   1640         a.ownerDocument = self
   1641         a.value = ""
   1642         return a
   1643 
   1644     def createElementNS(self, namespaceURI, qualifiedName):
   1645         prefix, localName = _nssplit(qualifiedName)
   1646         e = Element(qualifiedName, namespaceURI, prefix)
   1647         e.ownerDocument = self
   1648         return e
   1649 
   1650     def createAttributeNS(self, namespaceURI, qualifiedName):
   1651         prefix, localName = _nssplit(qualifiedName)
   1652         a = Attr(qualifiedName, namespaceURI, localName, prefix)
   1653         a.ownerDocument = self
   1654         a.value = ""
   1655         return a
   1656 
   1657     # A couple of implementation-specific helpers to create node types
   1658     # not supported by the W3C DOM specs:
   1659 
   1660     def _create_entity(self, name, publicId, systemId, notationName):
   1661         e = Entity(name, publicId, systemId, notationName)
   1662         e.ownerDocument = self
   1663         return e
   1664 
   1665     def _create_notation(self, name, publicId, systemId):
   1666         n = Notation(name, publicId, systemId)
   1667         n.ownerDocument = self
   1668         return n
   1669 
   1670     def getElementById(self, id):
   1671         if id in self._id_cache:
   1672             return self._id_cache[id]
   1673         if not (self._elem_info or self._magic_id_count):
   1674             return None
   1675 
   1676         stack = self._id_search_stack
   1677         if stack is None:
   1678             # we never searched before, or the cache has been cleared
   1679             stack = [self.documentElement]
   1680             self._id_search_stack = stack
   1681         elif not stack:
   1682             # Previous search was completed and cache is still valid;
   1683             # no matching node.
   1684             return None
   1685 
   1686         result = None
   1687         while stack:
   1688             node = stack.pop()
   1689             # add child elements to stack for continued searching
   1690             stack.extend([child for child in node.childNodes
   1691                           if child.nodeType in _nodeTypes_with_children])
   1692             # check this node
   1693             info = self._get_elem_info(node)
   1694             if info:
   1695                 # We have to process all ID attributes before
   1696                 # returning in order to get all the attributes set to
   1697                 # be IDs using Element.setIdAttribute*().
   1698                 for attr in node.attributes.values():
   1699                     if attr.namespaceURI:
   1700                         if info.isIdNS(attr.namespaceURI, attr.localName):
   1701                             self._id_cache[attr.value] = node
   1702                             if attr.value == id:
   1703                                 result = node
   1704                             elif not node._magic_id_nodes:
   1705                                 break
   1706                     elif info.isId(attr.name):
   1707                         self._id_cache[attr.value] = node
   1708                         if attr.value == id:
   1709                             result = node
   1710                         elif not node._magic_id_nodes:
   1711                             break
   1712                     elif attr._is_id:
   1713                         self._id_cache[attr.value] = node
   1714                         if attr.value == id:
   1715                             result = node
   1716                         elif node._magic_id_nodes == 1:
   1717                             break
   1718             elif node._magic_id_nodes:
   1719                 for attr in node.attributes.values():
   1720                     if attr._is_id:
   1721                         self._id_cache[attr.value] = node
   1722                         if attr.value == id:
   1723                             result = node
   1724             if result is not None:
   1725                 break
   1726         return result
   1727 
   1728     def getElementsByTagName(self, name):
   1729         return _get_elements_by_tagName_helper(self, name, NodeList())
   1730 
   1731     def getElementsByTagNameNS(self, namespaceURI, localName):
   1732         return _get_elements_by_tagName_ns_helper(
   1733             self, namespaceURI, localName, NodeList())
   1734 
   1735     def isSupported(self, feature, version):
   1736         return self.implementation.hasFeature(feature, version)
   1737 
   1738     def importNode(self, node, deep):
   1739         if node.nodeType == Node.DOCUMENT_NODE:
   1740             raise xml.dom.NotSupportedErr("cannot import document nodes")
   1741         elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
   1742             raise xml.dom.NotSupportedErr("cannot import document type nodes")
   1743         return _clone_node(node, deep, self)
   1744 
   1745     def writexml(self, writer, indent="", addindent="", newl="",
   1746                  encoding = None):
   1747         if encoding is None:
   1748             writer.write('<?xml version="1.0" ?>'+newl)
   1749         else:
   1750             writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl))
   1751         for node in self.childNodes:
   1752             node.writexml(writer, indent, addindent, newl)
   1753 
   1754     # DOM Level 3 (WD 9 April 2002)
   1755 
   1756     def renameNode(self, n, namespaceURI, name):
   1757         if n.ownerDocument is not self:
   1758             raise xml.dom.WrongDocumentErr(
   1759                 "cannot rename nodes from other documents;\n"
   1760                 "expected %s,\nfound %s" % (self, n.ownerDocument))
   1761         if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE):
   1762             raise xml.dom.NotSupportedErr(
   1763                 "renameNode() only applies to element and attribute nodes")
   1764         if namespaceURI != EMPTY_NAMESPACE:
   1765             if ':' in name:
   1766                 prefix, localName = name.split(':', 1)
   1767                 if (  prefix == "xmlns"
   1768                       and namespaceURI != xml.dom.XMLNS_NAMESPACE):
   1769                     raise xml.dom.NamespaceErr(
   1770                         "illegal use of 'xmlns' prefix")
   1771             else:
   1772                 if (  name == "xmlns"
   1773                       and namespaceURI != xml.dom.XMLNS_NAMESPACE
   1774                       and n.nodeType == Node.ATTRIBUTE_NODE):
   1775                     raise xml.dom.NamespaceErr(
   1776                         "illegal use of the 'xmlns' attribute")
   1777                 prefix = None
   1778                 localName = name
   1779         else:
   1780             prefix = None
   1781             localName = None
   1782         if n.nodeType == Node.ATTRIBUTE_NODE:
   1783             element = n.ownerElement
   1784             if element is not None:
   1785                 is_id = n._is_id
   1786                 element.removeAttributeNode(n)
   1787         else:
   1788             element = None
   1789         # avoid __setattr__
   1790         d = n.__dict__
   1791         d['prefix'] = prefix
   1792         d['localName'] = localName
   1793         d['namespaceURI'] = namespaceURI
   1794         d['nodeName'] = name
   1795         if n.nodeType == Node.ELEMENT_NODE:
   1796             d['tagName'] = name
   1797         else:
   1798             # attribute node
   1799             d['name'] = name
   1800             if element is not None:
   1801                 element.setAttributeNode(n)
   1802                 if is_id:
   1803                     element.setIdAttributeNode(n)
   1804         # It's not clear from a semantic perspective whether we should
   1805         # call the user data handlers for the NODE_RENAMED event since
   1806         # we're re-using the existing node.  The draft spec has been
   1807         # interpreted as meaning "no, don't call the handler unless a
   1808         # new node is created."
   1809         return n
   1810 
   1811 defproperty(Document, "documentElement",
   1812             doc="Top-level element of this document.")
   1813 
   1814 
   1815 def _clone_node(node, deep, newOwnerDocument):
   1816     """
   1817     Clone a node and give it the new owner document.
   1818     Called by Node.cloneNode and Document.importNode
   1819     """
   1820     if node.ownerDocument.isSameNode(newOwnerDocument):
   1821         operation = xml.dom.UserDataHandler.NODE_CLONED
   1822     else:
   1823         operation = xml.dom.UserDataHandler.NODE_IMPORTED
   1824     if node.nodeType == Node.ELEMENT_NODE:
   1825         clone = newOwnerDocument.createElementNS(node.namespaceURI,
   1826                                                  node.nodeName)
   1827         for attr in node.attributes.values():
   1828             clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value)
   1829             a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName)
   1830             a.specified = attr.specified
   1831 
   1832         if deep:
   1833             for child in node.childNodes:
   1834                 c = _clone_node(child, deep, newOwnerDocument)
   1835                 clone.appendChild(c)
   1836 
   1837     elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
   1838         clone = newOwnerDocument.createDocumentFragment()
   1839         if deep:
   1840             for child in node.childNodes:
   1841                 c = _clone_node(child, deep, newOwnerDocument)
   1842                 clone.appendChild(c)
   1843 
   1844     elif node.nodeType == Node.TEXT_NODE:
   1845         clone = newOwnerDocument.createTextNode(node.data)
   1846     elif node.nodeType == Node.CDATA_SECTION_NODE:
   1847         clone = newOwnerDocument.createCDATASection(node.data)
   1848     elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
   1849         clone = newOwnerDocument.createProcessingInstruction(node.target,
   1850                                                              node.data)
   1851     elif node.nodeType == Node.COMMENT_NODE:
   1852         clone = newOwnerDocument.createComment(node.data)
   1853     elif node.nodeType == Node.ATTRIBUTE_NODE:
   1854         clone = newOwnerDocument.createAttributeNS(node.namespaceURI,
   1855                                                    node.nodeName)
   1856         clone.specified = True
   1857         clone.value = node.value
   1858     elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
   1859         assert node.ownerDocument is not newOwnerDocument
   1860         operation = xml.dom.UserDataHandler.NODE_IMPORTED
   1861         clone = newOwnerDocument.implementation.createDocumentType(
   1862             node.name, node.publicId, node.systemId)
   1863         clone.ownerDocument = newOwnerDocument
   1864         if deep:
   1865             clone.entities._seq = []
   1866             clone.notations._seq = []
   1867             for n in node.notations._seq:
   1868                 notation = Notation(n.nodeName, n.publicId, n.systemId)
   1869                 notation.ownerDocument = newOwnerDocument
   1870                 clone.notations._seq.append(notation)
   1871                 if hasattr(n, '_call_user_data_handler'):
   1872                     n._call_user_data_handler(operation, n, notation)
   1873             for e in node.entities._seq:
   1874                 entity = Entity(e.nodeName, e.publicId, e.systemId,
   1875                                 e.notationName)
   1876                 entity.actualEncoding = e.actualEncoding
   1877                 entity.encoding = e.encoding
   1878                 entity.version = e.version
   1879                 entity.ownerDocument = newOwnerDocument
   1880                 clone.entities._seq.append(entity)
   1881                 if hasattr(e, '_call_user_data_handler'):
   1882                     e._call_user_data_handler(operation, n, entity)
   1883     else:
   1884         # Note the cloning of Document and DocumentType nodes is
   1885         # implementation specific.  minidom handles those cases
   1886         # directly in the cloneNode() methods.
   1887         raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node))
   1888 
   1889     # Check for _call_user_data_handler() since this could conceivably
   1890     # used with other DOM implementations (one of the FourThought
   1891     # DOMs, perhaps?).
   1892     if hasattr(node, '_call_user_data_handler'):
   1893         node._call_user_data_handler(operation, node, clone)
   1894     return clone
   1895 
   1896 
   1897 def _nssplit(qualifiedName):
   1898     fields = qualifiedName.split(':', 1)
   1899     if len(fields) == 2:
   1900         return fields
   1901     else:
   1902         return (None, fields[0])
   1903 
   1904 
   1905 def _get_StringIO():
   1906     # we can't use cStringIO since it doesn't support Unicode strings
   1907     from StringIO import StringIO
   1908     return StringIO()
   1909 
   1910 def _do_pulldom_parse(func, args, kwargs):
   1911     events = func(*args, **kwargs)
   1912     toktype, rootNode = events.getEvent()
   1913     events.expandNode(rootNode)
   1914     events.clear()
   1915     return rootNode
   1916 
   1917 def parse(file, parser=None, bufsize=None):
   1918     """Parse a file into a DOM by filename or file object."""
   1919     if parser is None and not bufsize:
   1920         from xml.dom import expatbuilder
   1921         return expatbuilder.parse(file)
   1922     else:
   1923         from xml.dom import pulldom
   1924         return _do_pulldom_parse(pulldom.parse, (file,),
   1925             {'parser': parser, 'bufsize': bufsize})
   1926 
   1927 def parseString(string, parser=None):
   1928     """Parse a file into a DOM from a string."""
   1929     if parser is None:
   1930         from xml.dom import expatbuilder
   1931         return expatbuilder.parseString(string)
   1932     else:
   1933         from xml.dom import pulldom
   1934         return _do_pulldom_parse(pulldom.parseString, (string,),
   1935                                  {'parser': parser})
   1936 
   1937 def getDOMImplementation(features=None):
   1938     if features:
   1939         if isinstance(features, StringTypes):
   1940             features = domreg._parse_feature_string(features)
   1941         for f, v in features:
   1942             if not Document.implementation.hasFeature(f, v):
   1943                 return None
   1944     return Document.implementation
   1945