1 """Simple implementation of the Level 1 DOM. 2 3 Namespaces and other minor Level 2 features are also supported. 4 5 parse("foo.xml") 6 7 parseString("<foo><bar/></foo>") 8 9 Todo: 10 ===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16 """ 17 18 import xml.dom 19 20 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 21 from xml.dom.minicompat import * 22 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 23 24 # This is used by the ID-cache invalidation checks; the list isn't 25 # actually complete, since the nodes being checked will never be the 26 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 27 # the node being added or removed, not the node being modified.) 28 # 29 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 30 xml.dom.Node.ENTITY_REFERENCE_NODE) 31 32 33 class Node(xml.dom.Node): 34 namespaceURI = None # this is non-null only for elements and attributes 35 parentNode = None 36 ownerDocument = None 37 nextSibling = None 38 previousSibling = None 39 40 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 41 42 def __nonzero__(self): 43 return True 44 45 def toxml(self, encoding = None): 46 return self.toprettyxml("", "", encoding) 47 48 def toprettyxml(self, indent="\t", newl="\n", encoding = None): 49 # indent = the indentation string to prepend, per level 50 # newl = the newline string to append 51 writer = _get_StringIO() 52 if encoding is not None: 53 import codecs 54 # Can't use codecs.getwriter to preserve 2.0 compatibility 55 writer = codecs.lookup(encoding)[3](writer) 56 if self.nodeType == Node.DOCUMENT_NODE: 57 # Can pass encoding only to document, to put it into XML header 58 self.writexml(writer, "", indent, newl, encoding) 59 else: 60 self.writexml(writer, "", indent, newl) 61 return writer.getvalue() 62 63 def hasChildNodes(self): 64 if self.childNodes: 65 return True 66 else: 67 return False 68 69 def _get_childNodes(self): 70 return self.childNodes 71 72 def _get_firstChild(self): 73 if self.childNodes: 74 return self.childNodes[0] 75 76 def _get_lastChild(self): 77 if self.childNodes: 78 return self.childNodes[-1] 79 80 def insertBefore(self, newChild, refChild): 81 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 82 for c in tuple(newChild.childNodes): 83 self.insertBefore(c, refChild) 84 ### The DOM does not clearly specify what to return in this case 85 return newChild 86 if newChild.nodeType not in self._child_node_types: 87 raise xml.dom.HierarchyRequestErr( 88 "%s cannot be child of %s" % (repr(newChild), repr(self))) 89 if newChild.parentNode is not None: 90 newChild.parentNode.removeChild(newChild) 91 if refChild is None: 92 self.appendChild(newChild) 93 else: 94 try: 95 index = self.childNodes.index(refChild) 96 except ValueError: 97 raise xml.dom.NotFoundErr() 98 if newChild.nodeType in _nodeTypes_with_children: 99 _clear_id_cache(self) 100 self.childNodes.insert(index, newChild) 101 newChild.nextSibling = refChild 102 refChild.previousSibling = newChild 103 if index: 104 node = self.childNodes[index-1] 105 node.nextSibling = newChild 106 newChild.previousSibling = node 107 else: 108 newChild.previousSibling = None 109 newChild.parentNode = self 110 return newChild 111 112 def appendChild(self, node): 113 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 114 for c in tuple(node.childNodes): 115 self.appendChild(c) 116 ### The DOM does not clearly specify what to return in this case 117 return node 118 if node.nodeType not in self._child_node_types: 119 raise xml.dom.HierarchyRequestErr( 120 "%s cannot be child of %s" % (repr(node), repr(self))) 121 elif node.nodeType in _nodeTypes_with_children: 122 _clear_id_cache(self) 123 if node.parentNode is not None: 124 node.parentNode.removeChild(node) 125 _append_child(self, node) 126 node.nextSibling = None 127 return node 128 129 def replaceChild(self, newChild, oldChild): 130 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 131 refChild = oldChild.nextSibling 132 self.removeChild(oldChild) 133 return self.insertBefore(newChild, refChild) 134 if newChild.nodeType not in self._child_node_types: 135 raise xml.dom.HierarchyRequestErr( 136 "%s cannot be child of %s" % (repr(newChild), repr(self))) 137 if newChild is oldChild: 138 return 139 if newChild.parentNode is not None: 140 newChild.parentNode.removeChild(newChild) 141 try: 142 index = self.childNodes.index(oldChild) 143 except ValueError: 144 raise xml.dom.NotFoundErr() 145 self.childNodes[index] = newChild 146 newChild.parentNode = self 147 oldChild.parentNode = None 148 if (newChild.nodeType in _nodeTypes_with_children 149 or oldChild.nodeType in _nodeTypes_with_children): 150 _clear_id_cache(self) 151 newChild.nextSibling = oldChild.nextSibling 152 newChild.previousSibling = oldChild.previousSibling 153 oldChild.nextSibling = None 154 oldChild.previousSibling = None 155 if newChild.previousSibling: 156 newChild.previousSibling.nextSibling = newChild 157 if newChild.nextSibling: 158 newChild.nextSibling.previousSibling = newChild 159 return oldChild 160 161 def removeChild(self, oldChild): 162 try: 163 self.childNodes.remove(oldChild) 164 except ValueError: 165 raise xml.dom.NotFoundErr() 166 if oldChild.nextSibling is not None: 167 oldChild.nextSibling.previousSibling = oldChild.previousSibling 168 if oldChild.previousSibling is not None: 169 oldChild.previousSibling.nextSibling = oldChild.nextSibling 170 oldChild.nextSibling = oldChild.previousSibling = None 171 if oldChild.nodeType in _nodeTypes_with_children: 172 _clear_id_cache(self) 173 174 oldChild.parentNode = None 175 return oldChild 176 177 def normalize(self): 178 L = [] 179 for child in self.childNodes: 180 if child.nodeType == Node.TEXT_NODE: 181 if not child.data: 182 # empty text node; discard 183 if L: 184 L[-1].nextSibling = child.nextSibling 185 if child.nextSibling: 186 child.nextSibling.previousSibling = child.previousSibling 187 child.unlink() 188 elif L and L[-1].nodeType == child.nodeType: 189 # collapse text node 190 node = L[-1] 191 node.data = node.data + child.data 192 node.nextSibling = child.nextSibling 193 if child.nextSibling: 194 child.nextSibling.previousSibling = node 195 child.unlink() 196 else: 197 L.append(child) 198 else: 199 L.append(child) 200 if child.nodeType == Node.ELEMENT_NODE: 201 child.normalize() 202 self.childNodes[:] = L 203 204 def cloneNode(self, deep): 205 return _clone_node(self, deep, self.ownerDocument or self) 206 207 def isSupported(self, feature, version): 208 return self.ownerDocument.implementation.hasFeature(feature, version) 209 210 def _get_localName(self): 211 # Overridden in Element and Attr where localName can be Non-Null 212 return None 213 214 # Node interfaces from Level 3 (WD 9 April 2002) 215 216 def isSameNode(self, other): 217 return self is other 218 219 def getInterface(self, feature): 220 if self.isSupported(feature, None): 221 return self 222 else: 223 return None 224 225 # The "user data" functions use a dictionary that is only present 226 # if some user data has been set, so be careful not to assume it 227 # exists. 228 229 def getUserData(self, key): 230 try: 231 return self._user_data[key][0] 232 except (AttributeError, KeyError): 233 return None 234 235 def setUserData(self, key, data, handler): 236 old = None 237 try: 238 d = self._user_data 239 except AttributeError: 240 d = {} 241 self._user_data = d 242 if key in d: 243 old = d[key][0] 244 if data is None: 245 # ignore handlers passed for None 246 handler = None 247 if old is not None: 248 del d[key] 249 else: 250 d[key] = (data, handler) 251 return old 252 253 def _call_user_data_handler(self, operation, src, dst): 254 if hasattr(self, "_user_data"): 255 for key, (data, handler) in self._user_data.items(): 256 if handler is not None: 257 handler.handle(operation, key, data, src, dst) 258 259 # minidom-specific API: 260 261 def unlink(self): 262 self.parentNode = self.ownerDocument = None 263 if self.childNodes: 264 for child in self.childNodes: 265 child.unlink() 266 self.childNodes = NodeList() 267 self.previousSibling = None 268 self.nextSibling = None 269 270 defproperty(Node, "firstChild", doc="First child node, or None.") 271 defproperty(Node, "lastChild", doc="Last child node, or None.") 272 defproperty(Node, "localName", doc="Namespace-local name of this node.") 273 274 275 def _append_child(self, node): 276 # fast path with less checks; usable by DOM builders if careful 277 childNodes = self.childNodes 278 if childNodes: 279 last = childNodes[-1] 280 node.__dict__["previousSibling"] = last 281 last.__dict__["nextSibling"] = node 282 childNodes.append(node) 283 node.__dict__["parentNode"] = self 284 285 def _in_document(node): 286 # return True iff node is part of a document tree 287 while node is not None: 288 if node.nodeType == Node.DOCUMENT_NODE: 289 return True 290 node = node.parentNode 291 return False 292 293 def _write_data(writer, data): 294 "Writes datachars to writer." 295 if data: 296 data = data.replace("&", "&").replace("<", "<"). \ 297 replace("\"", """).replace(">", ">") 298 writer.write(data) 299 300 def _get_elements_by_tagName_helper(parent, name, rc): 301 for node in parent.childNodes: 302 if node.nodeType == Node.ELEMENT_NODE and \ 303 (name == "*" or node.tagName == name): 304 rc.append(node) 305 _get_elements_by_tagName_helper(node, name, rc) 306 return rc 307 308 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 309 for node in parent.childNodes: 310 if node.nodeType == Node.ELEMENT_NODE: 311 if ((localName == "*" or node.localName == localName) and 312 (nsURI == "*" or node.namespaceURI == nsURI)): 313 rc.append(node) 314 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 315 return rc 316 317 class DocumentFragment(Node): 318 nodeType = Node.DOCUMENT_FRAGMENT_NODE 319 nodeName = "#document-fragment" 320 nodeValue = None 321 attributes = None 322 parentNode = None 323 _child_node_types = (Node.ELEMENT_NODE, 324 Node.TEXT_NODE, 325 Node.CDATA_SECTION_NODE, 326 Node.ENTITY_REFERENCE_NODE, 327 Node.PROCESSING_INSTRUCTION_NODE, 328 Node.COMMENT_NODE, 329 Node.NOTATION_NODE) 330 331 def __init__(self): 332 self.childNodes = NodeList() 333 334 335 class Attr(Node): 336 nodeType = Node.ATTRIBUTE_NODE 337 attributes = None 338 ownerElement = None 339 specified = False 340 _is_id = False 341 342 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 343 344 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 345 prefix=None): 346 # skip setattr for performance 347 d = self.__dict__ 348 d["nodeName"] = d["name"] = qName 349 d["namespaceURI"] = namespaceURI 350 d["prefix"] = prefix 351 d['childNodes'] = NodeList() 352 353 # Add the single child node that represents the value of the attr 354 self.childNodes.append(Text()) 355 356 # nodeValue and value are set elsewhere 357 358 def _get_localName(self): 359 return self.nodeName.split(":", 1)[-1] 360 361 def _get_name(self): 362 return self.name 363 364 def _get_specified(self): 365 return self.specified 366 367 def __setattr__(self, name, value): 368 d = self.__dict__ 369 if name in ("value", "nodeValue"): 370 d["value"] = d["nodeValue"] = value 371 d2 = self.childNodes[0].__dict__ 372 d2["data"] = d2["nodeValue"] = value 373 if self.ownerElement is not None: 374 _clear_id_cache(self.ownerElement) 375 elif name in ("name", "nodeName"): 376 d["name"] = d["nodeName"] = value 377 if self.ownerElement is not None: 378 _clear_id_cache(self.ownerElement) 379 else: 380 d[name] = value 381 382 def _set_prefix(self, prefix): 383 nsuri = self.namespaceURI 384 if prefix == "xmlns": 385 if nsuri and nsuri != XMLNS_NAMESPACE: 386 raise xml.dom.NamespaceErr( 387 "illegal use of 'xmlns' prefix for the wrong namespace") 388 d = self.__dict__ 389 d['prefix'] = prefix 390 if prefix is None: 391 newName = self.localName 392 else: 393 newName = "%s:%s" % (prefix, self.localName) 394 if self.ownerElement: 395 _clear_id_cache(self.ownerElement) 396 d['nodeName'] = d['name'] = newName 397 398 def _set_value(self, value): 399 d = self.__dict__ 400 d['value'] = d['nodeValue'] = value 401 if self.ownerElement: 402 _clear_id_cache(self.ownerElement) 403 self.childNodes[0].data = value 404 405 def unlink(self): 406 # This implementation does not call the base implementation 407 # since most of that is not needed, and the expense of the 408 # method call is not warranted. We duplicate the removal of 409 # children, but that's all we needed from the base class. 410 elem = self.ownerElement 411 if elem is not None: 412 del elem._attrs[self.nodeName] 413 del elem._attrsNS[(self.namespaceURI, self.localName)] 414 if self._is_id: 415 self._is_id = False 416 elem._magic_id_nodes -= 1 417 self.ownerDocument._magic_id_count -= 1 418 for child in self.childNodes: 419 child.unlink() 420 del self.childNodes[:] 421 422 def _get_isId(self): 423 if self._is_id: 424 return True 425 doc = self.ownerDocument 426 elem = self.ownerElement 427 if doc is None or elem is None: 428 return False 429 430 info = doc._get_elem_info(elem) 431 if info is None: 432 return False 433 if self.namespaceURI: 434 return info.isIdNS(self.namespaceURI, self.localName) 435 else: 436 return info.isId(self.nodeName) 437 438 def _get_schemaType(self): 439 doc = self.ownerDocument 440 elem = self.ownerElement 441 if doc is None or elem is None: 442 return _no_type 443 444 info = doc._get_elem_info(elem) 445 if info is None: 446 return _no_type 447 if self.namespaceURI: 448 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 449 else: 450 return info.getAttributeType(self.nodeName) 451 452 defproperty(Attr, "isId", doc="True if this attribute is an ID.") 453 defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 454 defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 455 456 457 class NamedNodeMap(object): 458 """The attribute list is a transient interface to the underlying 459 dictionaries. Mutations here will change the underlying element's 460 dictionary. 461 462 Ordering is imposed artificially and does not reflect the order of 463 attributes as found in an input document. 464 """ 465 466 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 467 468 def __init__(self, attrs, attrsNS, ownerElement): 469 self._attrs = attrs 470 self._attrsNS = attrsNS 471 self._ownerElement = ownerElement 472 473 def _get_length(self): 474 return len(self._attrs) 475 476 def item(self, index): 477 try: 478 return self[self._attrs.keys()[index]] 479 except IndexError: 480 return None 481 482 def items(self): 483 L = [] 484 for node in self._attrs.values(): 485 L.append((node.nodeName, node.value)) 486 return L 487 488 def itemsNS(self): 489 L = [] 490 for node in self._attrs.values(): 491 L.append(((node.namespaceURI, node.localName), node.value)) 492 return L 493 494 def has_key(self, key): 495 if isinstance(key, StringTypes): 496 return key in self._attrs 497 else: 498 return key in self._attrsNS 499 500 def keys(self): 501 return self._attrs.keys() 502 503 def keysNS(self): 504 return self._attrsNS.keys() 505 506 def values(self): 507 return self._attrs.values() 508 509 def get(self, name, value=None): 510 return self._attrs.get(name, value) 511 512 __len__ = _get_length 513 514 __hash__ = None # Mutable type can't be correctly hashed 515 def __cmp__(self, other): 516 if self._attrs is getattr(other, "_attrs", None): 517 return 0 518 else: 519 return cmp(id(self), id(other)) 520 521 def __getitem__(self, attname_or_tuple): 522 if isinstance(attname_or_tuple, tuple): 523 return self._attrsNS[attname_or_tuple] 524 else: 525 return self._attrs[attname_or_tuple] 526 527 # same as set 528 def __setitem__(self, attname, value): 529 if isinstance(value, StringTypes): 530 try: 531 node = self._attrs[attname] 532 except KeyError: 533 node = Attr(attname) 534 node.ownerDocument = self._ownerElement.ownerDocument 535 self.setNamedItem(node) 536 node.value = value 537 else: 538 if not isinstance(value, Attr): 539 raise TypeError, "value must be a string or Attr object" 540 node = value 541 self.setNamedItem(node) 542 543 def getNamedItem(self, name): 544 try: 545 return self._attrs[name] 546 except KeyError: 547 return None 548 549 def getNamedItemNS(self, namespaceURI, localName): 550 try: 551 return self._attrsNS[(namespaceURI, localName)] 552 except KeyError: 553 return None 554 555 def removeNamedItem(self, name): 556 n = self.getNamedItem(name) 557 if n is not None: 558 _clear_id_cache(self._ownerElement) 559 del self._attrs[n.nodeName] 560 del self._attrsNS[(n.namespaceURI, n.localName)] 561 if 'ownerElement' in n.__dict__: 562 n.__dict__['ownerElement'] = None 563 return n 564 else: 565 raise xml.dom.NotFoundErr() 566 567 def removeNamedItemNS(self, namespaceURI, localName): 568 n = self.getNamedItemNS(namespaceURI, localName) 569 if n is not None: 570 _clear_id_cache(self._ownerElement) 571 del self._attrsNS[(n.namespaceURI, n.localName)] 572 del self._attrs[n.nodeName] 573 if 'ownerElement' in n.__dict__: 574 n.__dict__['ownerElement'] = None 575 return n 576 else: 577 raise xml.dom.NotFoundErr() 578 579 def setNamedItem(self, node): 580 if not isinstance(node, Attr): 581 raise xml.dom.HierarchyRequestErr( 582 "%s cannot be child of %s" % (repr(node), repr(self))) 583 old = self._attrs.get(node.name) 584 if old: 585 old.unlink() 586 self._attrs[node.name] = node 587 self._attrsNS[(node.namespaceURI, node.localName)] = node 588 node.ownerElement = self._ownerElement 589 _clear_id_cache(node.ownerElement) 590 return old 591 592 def setNamedItemNS(self, node): 593 return self.setNamedItem(node) 594 595 def __delitem__(self, attname_or_tuple): 596 node = self[attname_or_tuple] 597 _clear_id_cache(node.ownerElement) 598 node.unlink() 599 600 def __getstate__(self): 601 return self._attrs, self._attrsNS, self._ownerElement 602 603 def __setstate__(self, state): 604 self._attrs, self._attrsNS, self._ownerElement = state 605 606 defproperty(NamedNodeMap, "length", 607 doc="Number of nodes in the NamedNodeMap.") 608 609 AttributeList = NamedNodeMap 610 611 612 class TypeInfo(object): 613 __slots__ = 'namespace', 'name' 614 615 def __init__(self, namespace, name): 616 self.namespace = namespace 617 self.name = name 618 619 def __repr__(self): 620 if self.namespace: 621 return "<TypeInfo %r (from %r)>" % (self.name, self.namespace) 622 else: 623 return "<TypeInfo %r>" % self.name 624 625 def _get_name(self): 626 return self.name 627 628 def _get_namespace(self): 629 return self.namespace 630 631 _no_type = TypeInfo(None, None) 632 633 class Element(Node): 634 nodeType = Node.ELEMENT_NODE 635 nodeValue = None 636 schemaType = _no_type 637 638 _magic_id_nodes = 0 639 640 _child_node_types = (Node.ELEMENT_NODE, 641 Node.PROCESSING_INSTRUCTION_NODE, 642 Node.COMMENT_NODE, 643 Node.TEXT_NODE, 644 Node.CDATA_SECTION_NODE, 645 Node.ENTITY_REFERENCE_NODE) 646 647 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 648 localName=None): 649 self.tagName = self.nodeName = tagName 650 self.prefix = prefix 651 self.namespaceURI = namespaceURI 652 self.childNodes = NodeList() 653 654 self._attrs = {} # attributes are double-indexed: 655 self._attrsNS = {} # tagName -> Attribute 656 # URI,localName -> Attribute 657 # in the future: consider lazy generation 658 # of attribute objects this is too tricky 659 # for now because of headaches with 660 # namespaces. 661 662 def _get_localName(self): 663 return self.tagName.split(":", 1)[-1] 664 665 def _get_tagName(self): 666 return self.tagName 667 668 def unlink(self): 669 for attr in self._attrs.values(): 670 attr.unlink() 671 self._attrs = None 672 self._attrsNS = None 673 Node.unlink(self) 674 675 def getAttribute(self, attname): 676 try: 677 return self._attrs[attname].value 678 except KeyError: 679 return "" 680 681 def getAttributeNS(self, namespaceURI, localName): 682 try: 683 return self._attrsNS[(namespaceURI, localName)].value 684 except KeyError: 685 return "" 686 687 def setAttribute(self, attname, value): 688 attr = self.getAttributeNode(attname) 689 if attr is None: 690 attr = Attr(attname) 691 # for performance 692 d = attr.__dict__ 693 d["value"] = d["nodeValue"] = value 694 d["ownerDocument"] = self.ownerDocument 695 self.setAttributeNode(attr) 696 elif value != attr.value: 697 d = attr.__dict__ 698 d["value"] = d["nodeValue"] = value 699 if attr.isId: 700 _clear_id_cache(self) 701 702 def setAttributeNS(self, namespaceURI, qualifiedName, value): 703 prefix, localname = _nssplit(qualifiedName) 704 attr = self.getAttributeNodeNS(namespaceURI, localname) 705 if attr is None: 706 # for performance 707 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 708 d = attr.__dict__ 709 d["prefix"] = prefix 710 d["nodeName"] = qualifiedName 711 d["value"] = d["nodeValue"] = value 712 d["ownerDocument"] = self.ownerDocument 713 self.setAttributeNode(attr) 714 else: 715 d = attr.__dict__ 716 if value != attr.value: 717 d["value"] = d["nodeValue"] = value 718 if attr.isId: 719 _clear_id_cache(self) 720 if attr.prefix != prefix: 721 d["prefix"] = prefix 722 d["nodeName"] = qualifiedName 723 724 def getAttributeNode(self, attrname): 725 return self._attrs.get(attrname) 726 727 def getAttributeNodeNS(self, namespaceURI, localName): 728 return self._attrsNS.get((namespaceURI, localName)) 729 730 def setAttributeNode(self, attr): 731 if attr.ownerElement not in (None, self): 732 raise xml.dom.InuseAttributeErr("attribute node already owned") 733 old1 = self._attrs.get(attr.name, None) 734 if old1 is not None: 735 self.removeAttributeNode(old1) 736 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 737 if old2 is not None and old2 is not old1: 738 self.removeAttributeNode(old2) 739 _set_attribute_node(self, attr) 740 741 if old1 is not attr: 742 # It might have already been part of this node, in which case 743 # it doesn't represent a change, and should not be returned. 744 return old1 745 if old2 is not attr: 746 return old2 747 748 setAttributeNodeNS = setAttributeNode 749 750 def removeAttribute(self, name): 751 try: 752 attr = self._attrs[name] 753 except KeyError: 754 raise xml.dom.NotFoundErr() 755 self.removeAttributeNode(attr) 756 757 def removeAttributeNS(self, namespaceURI, localName): 758 try: 759 attr = self._attrsNS[(namespaceURI, localName)] 760 except KeyError: 761 raise xml.dom.NotFoundErr() 762 self.removeAttributeNode(attr) 763 764 def removeAttributeNode(self, node): 765 if node is None: 766 raise xml.dom.NotFoundErr() 767 try: 768 self._attrs[node.name] 769 except KeyError: 770 raise xml.dom.NotFoundErr() 771 _clear_id_cache(self) 772 node.unlink() 773 # Restore this since the node is still useful and otherwise 774 # unlinked 775 node.ownerDocument = self.ownerDocument 776 777 removeAttributeNodeNS = removeAttributeNode 778 779 def hasAttribute(self, name): 780 return name in self._attrs 781 782 def hasAttributeNS(self, namespaceURI, localName): 783 return (namespaceURI, localName) in self._attrsNS 784 785 def getElementsByTagName(self, name): 786 return _get_elements_by_tagName_helper(self, name, NodeList()) 787 788 def getElementsByTagNameNS(self, namespaceURI, localName): 789 return _get_elements_by_tagName_ns_helper( 790 self, namespaceURI, localName, NodeList()) 791 792 def __repr__(self): 793 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 794 795 def writexml(self, writer, indent="", addindent="", newl=""): 796 # indent = current indentation 797 # addindent = indentation to add to higher levels 798 # newl = newline string 799 writer.write(indent+"<" + self.tagName) 800 801 attrs = self._get_attributes() 802 a_names = attrs.keys() 803 a_names.sort() 804 805 for a_name in a_names: 806 writer.write(" %s=\"" % a_name) 807 _write_data(writer, attrs[a_name].value) 808 writer.write("\"") 809 if self.childNodes: 810 writer.write(">") 811 if (len(self.childNodes) == 1 and 812 self.childNodes[0].nodeType == Node.TEXT_NODE): 813 self.childNodes[0].writexml(writer, '', '', '') 814 else: 815 writer.write(newl) 816 for node in self.childNodes: 817 node.writexml(writer, indent+addindent, addindent, newl) 818 writer.write(indent) 819 writer.write("</%s>%s" % (self.tagName, newl)) 820 else: 821 writer.write("/>%s"%(newl)) 822 823 def _get_attributes(self): 824 return NamedNodeMap(self._attrs, self._attrsNS, self) 825 826 def hasAttributes(self): 827 if self._attrs: 828 return True 829 else: 830 return False 831 832 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 833 834 def setIdAttribute(self, name): 835 idAttr = self.getAttributeNode(name) 836 self.setIdAttributeNode(idAttr) 837 838 def setIdAttributeNS(self, namespaceURI, localName): 839 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 840 self.setIdAttributeNode(idAttr) 841 842 def setIdAttributeNode(self, idAttr): 843 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 844 raise xml.dom.NotFoundErr() 845 if _get_containing_entref(self) is not None: 846 raise xml.dom.NoModificationAllowedErr() 847 if not idAttr._is_id: 848 idAttr.__dict__['_is_id'] = True 849 self._magic_id_nodes += 1 850 self.ownerDocument._magic_id_count += 1 851 _clear_id_cache(self) 852 853 defproperty(Element, "attributes", 854 doc="NamedNodeMap of attributes on the element.") 855 defproperty(Element, "localName", 856 doc="Namespace-local name of this element.") 857 858 859 def _set_attribute_node(element, attr): 860 _clear_id_cache(element) 861 element._attrs[attr.name] = attr 862 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 863 864 # This creates a circular reference, but Element.unlink() 865 # breaks the cycle since the references to the attribute 866 # dictionaries are tossed. 867 attr.__dict__['ownerElement'] = element 868 869 870 class Childless: 871 """Mixin that makes childless-ness easy to implement and avoids 872 the complexity of the Node methods that deal with children. 873 """ 874 875 attributes = None 876 childNodes = EmptyNodeList() 877 firstChild = None 878 lastChild = None 879 880 def _get_firstChild(self): 881 return None 882 883 def _get_lastChild(self): 884 return None 885 886 def appendChild(self, node): 887 raise xml.dom.HierarchyRequestErr( 888 self.nodeName + " nodes cannot have children") 889 890 def hasChildNodes(self): 891 return False 892 893 def insertBefore(self, newChild, refChild): 894 raise xml.dom.HierarchyRequestErr( 895 self.nodeName + " nodes do not have children") 896 897 def removeChild(self, oldChild): 898 raise xml.dom.NotFoundErr( 899 self.nodeName + " nodes do not have children") 900 901 def normalize(self): 902 # For childless nodes, normalize() has nothing to do. 903 pass 904 905 def replaceChild(self, newChild, oldChild): 906 raise xml.dom.HierarchyRequestErr( 907 self.nodeName + " nodes do not have children") 908 909 910 class ProcessingInstruction(Childless, Node): 911 nodeType = Node.PROCESSING_INSTRUCTION_NODE 912 913 def __init__(self, target, data): 914 self.target = self.nodeName = target 915 self.data = self.nodeValue = data 916 917 def _get_data(self): 918 return self.data 919 def _set_data(self, value): 920 d = self.__dict__ 921 d['data'] = d['nodeValue'] = value 922 923 def _get_target(self): 924 return self.target 925 def _set_target(self, value): 926 d = self.__dict__ 927 d['target'] = d['nodeName'] = value 928 929 def __setattr__(self, name, value): 930 if name == "data" or name == "nodeValue": 931 self.__dict__['data'] = self.__dict__['nodeValue'] = value 932 elif name == "target" or name == "nodeName": 933 self.__dict__['target'] = self.__dict__['nodeName'] = value 934 else: 935 self.__dict__[name] = value 936 937 def writexml(self, writer, indent="", addindent="", newl=""): 938 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 939 940 941 class CharacterData(Childless, Node): 942 def _get_length(self): 943 return len(self.data) 944 __len__ = _get_length 945 946 def _get_data(self): 947 return self.__dict__['data'] 948 def _set_data(self, data): 949 d = self.__dict__ 950 d['data'] = d['nodeValue'] = data 951 952 _get_nodeValue = _get_data 953 _set_nodeValue = _set_data 954 955 def __setattr__(self, name, value): 956 if name == "data" or name == "nodeValue": 957 self.__dict__['data'] = self.__dict__['nodeValue'] = value 958 else: 959 self.__dict__[name] = value 960 961 def __repr__(self): 962 data = self.data 963 if len(data) > 10: 964 dotdotdot = "..." 965 else: 966 dotdotdot = "" 967 return '<DOM %s node "%r%s">' % ( 968 self.__class__.__name__, data[0:10], dotdotdot) 969 970 def substringData(self, offset, count): 971 if offset < 0: 972 raise xml.dom.IndexSizeErr("offset cannot be negative") 973 if offset >= len(self.data): 974 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 975 if count < 0: 976 raise xml.dom.IndexSizeErr("count cannot be negative") 977 return self.data[offset:offset+count] 978 979 def appendData(self, arg): 980 self.data = self.data + arg 981 982 def insertData(self, offset, arg): 983 if offset < 0: 984 raise xml.dom.IndexSizeErr("offset cannot be negative") 985 if offset >= len(self.data): 986 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 987 if arg: 988 self.data = "%s%s%s" % ( 989 self.data[:offset], arg, self.data[offset:]) 990 991 def deleteData(self, offset, count): 992 if offset < 0: 993 raise xml.dom.IndexSizeErr("offset cannot be negative") 994 if offset >= len(self.data): 995 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 996 if count < 0: 997 raise xml.dom.IndexSizeErr("count cannot be negative") 998 if count: 999 self.data = self.data[:offset] + self.data[offset+count:] 1000 1001 def replaceData(self, offset, count, arg): 1002 if offset < 0: 1003 raise xml.dom.IndexSizeErr("offset cannot be negative") 1004 if offset >= len(self.data): 1005 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1006 if count < 0: 1007 raise xml.dom.IndexSizeErr("count cannot be negative") 1008 if count: 1009 self.data = "%s%s%s" % ( 1010 self.data[:offset], arg, self.data[offset+count:]) 1011 1012 defproperty(CharacterData, "length", doc="Length of the string data.") 1013 1014 1015 class Text(CharacterData): 1016 # Make sure we don't add an instance __dict__ if we don't already 1017 # have one, at least when that's possible: 1018 # XXX this does not work, CharacterData is an old-style class 1019 # __slots__ = () 1020 1021 nodeType = Node.TEXT_NODE 1022 nodeName = "#text" 1023 attributes = None 1024 1025 def splitText(self, offset): 1026 if offset < 0 or offset > len(self.data): 1027 raise xml.dom.IndexSizeErr("illegal offset value") 1028 newText = self.__class__() 1029 newText.data = self.data[offset:] 1030 newText.ownerDocument = self.ownerDocument 1031 next = self.nextSibling 1032 if self.parentNode and self in self.parentNode.childNodes: 1033 if next is None: 1034 self.parentNode.appendChild(newText) 1035 else: 1036 self.parentNode.insertBefore(newText, next) 1037 self.data = self.data[:offset] 1038 return newText 1039 1040 def writexml(self, writer, indent="", addindent="", newl=""): 1041 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1042 1043 # DOM Level 3 (WD 9 April 2002) 1044 1045 def _get_wholeText(self): 1046 L = [self.data] 1047 n = self.previousSibling 1048 while n is not None: 1049 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1050 L.insert(0, n.data) 1051 n = n.previousSibling 1052 else: 1053 break 1054 n = self.nextSibling 1055 while n is not None: 1056 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1057 L.append(n.data) 1058 n = n.nextSibling 1059 else: 1060 break 1061 return ''.join(L) 1062 1063 def replaceWholeText(self, content): 1064 # XXX This needs to be seriously changed if minidom ever 1065 # supports EntityReference nodes. 1066 parent = self.parentNode 1067 n = self.previousSibling 1068 while n is not None: 1069 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1070 next = n.previousSibling 1071 parent.removeChild(n) 1072 n = next 1073 else: 1074 break 1075 n = self.nextSibling 1076 if not content: 1077 parent.removeChild(self) 1078 while n is not None: 1079 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1080 next = n.nextSibling 1081 parent.removeChild(n) 1082 n = next 1083 else: 1084 break 1085 if content: 1086 d = self.__dict__ 1087 d['data'] = content 1088 d['nodeValue'] = content 1089 return self 1090 else: 1091 return None 1092 1093 def _get_isWhitespaceInElementContent(self): 1094 if self.data.strip(): 1095 return False 1096 elem = _get_containing_element(self) 1097 if elem is None: 1098 return False 1099 info = self.ownerDocument._get_elem_info(elem) 1100 if info is None: 1101 return False 1102 else: 1103 return info.isElementContent() 1104 1105 defproperty(Text, "isWhitespaceInElementContent", 1106 doc="True iff this text node contains only whitespace" 1107 " and is in element content.") 1108 defproperty(Text, "wholeText", 1109 doc="The text of all logically-adjacent text nodes.") 1110 1111 1112 def _get_containing_element(node): 1113 c = node.parentNode 1114 while c is not None: 1115 if c.nodeType == Node.ELEMENT_NODE: 1116 return c 1117 c = c.parentNode 1118 return None 1119 1120 def _get_containing_entref(node): 1121 c = node.parentNode 1122 while c is not None: 1123 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1124 return c 1125 c = c.parentNode 1126 return None 1127 1128 1129 class Comment(Childless, CharacterData): 1130 nodeType = Node.COMMENT_NODE 1131 nodeName = "#comment" 1132 1133 def __init__(self, data): 1134 self.data = self.nodeValue = data 1135 1136 def writexml(self, writer, indent="", addindent="", newl=""): 1137 if "--" in self.data: 1138 raise ValueError("'--' is not allowed in a comment node") 1139 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1140 1141 1142 class CDATASection(Text): 1143 # Make sure we don't add an instance __dict__ if we don't already 1144 # have one, at least when that's possible: 1145 # XXX this does not work, Text is an old-style class 1146 # __slots__ = () 1147 1148 nodeType = Node.CDATA_SECTION_NODE 1149 nodeName = "#cdata-section" 1150 1151 def writexml(self, writer, indent="", addindent="", newl=""): 1152 if self.data.find("]]>") >= 0: 1153 raise ValueError("']]>' not allowed in a CDATA section") 1154 writer.write("<![CDATA[%s]]>" % self.data) 1155 1156 1157 class ReadOnlySequentialNamedNodeMap(object): 1158 __slots__ = '_seq', 1159 1160 def __init__(self, seq=()): 1161 # seq should be a list or tuple 1162 self._seq = seq 1163 1164 def __len__(self): 1165 return len(self._seq) 1166 1167 def _get_length(self): 1168 return len(self._seq) 1169 1170 def getNamedItem(self, name): 1171 for n in self._seq: 1172 if n.nodeName == name: 1173 return n 1174 1175 def getNamedItemNS(self, namespaceURI, localName): 1176 for n in self._seq: 1177 if n.namespaceURI == namespaceURI and n.localName == localName: 1178 return n 1179 1180 def __getitem__(self, name_or_tuple): 1181 if isinstance(name_or_tuple, tuple): 1182 node = self.getNamedItemNS(*name_or_tuple) 1183 else: 1184 node = self.getNamedItem(name_or_tuple) 1185 if node is None: 1186 raise KeyError, name_or_tuple 1187 return node 1188 1189 def item(self, index): 1190 if index < 0: 1191 return None 1192 try: 1193 return self._seq[index] 1194 except IndexError: 1195 return None 1196 1197 def removeNamedItem(self, name): 1198 raise xml.dom.NoModificationAllowedErr( 1199 "NamedNodeMap instance is read-only") 1200 1201 def removeNamedItemNS(self, namespaceURI, localName): 1202 raise xml.dom.NoModificationAllowedErr( 1203 "NamedNodeMap instance is read-only") 1204 1205 def setNamedItem(self, node): 1206 raise xml.dom.NoModificationAllowedErr( 1207 "NamedNodeMap instance is read-only") 1208 1209 def setNamedItemNS(self, node): 1210 raise xml.dom.NoModificationAllowedErr( 1211 "NamedNodeMap instance is read-only") 1212 1213 def __getstate__(self): 1214 return [self._seq] 1215 1216 def __setstate__(self, state): 1217 self._seq = state[0] 1218 1219 defproperty(ReadOnlySequentialNamedNodeMap, "length", 1220 doc="Number of entries in the NamedNodeMap.") 1221 1222 1223 class Identified: 1224 """Mix-in class that supports the publicId and systemId attributes.""" 1225 1226 # XXX this does not work, this is an old-style class 1227 # __slots__ = 'publicId', 'systemId' 1228 1229 def _identified_mixin_init(self, publicId, systemId): 1230 self.publicId = publicId 1231 self.systemId = systemId 1232 1233 def _get_publicId(self): 1234 return self.publicId 1235 1236 def _get_systemId(self): 1237 return self.systemId 1238 1239 class DocumentType(Identified, Childless, Node): 1240 nodeType = Node.DOCUMENT_TYPE_NODE 1241 nodeValue = None 1242 name = None 1243 publicId = None 1244 systemId = None 1245 internalSubset = None 1246 1247 def __init__(self, qualifiedName): 1248 self.entities = ReadOnlySequentialNamedNodeMap() 1249 self.notations = ReadOnlySequentialNamedNodeMap() 1250 if qualifiedName: 1251 prefix, localname = _nssplit(qualifiedName) 1252 self.name = localname 1253 self.nodeName = self.name 1254 1255 def _get_internalSubset(self): 1256 return self.internalSubset 1257 1258 def cloneNode(self, deep): 1259 if self.ownerDocument is None: 1260 # it's ok 1261 clone = DocumentType(None) 1262 clone.name = self.name 1263 clone.nodeName = self.name 1264 operation = xml.dom.UserDataHandler.NODE_CLONED 1265 if deep: 1266 clone.entities._seq = [] 1267 clone.notations._seq = [] 1268 for n in self.notations._seq: 1269 notation = Notation(n.nodeName, n.publicId, n.systemId) 1270 clone.notations._seq.append(notation) 1271 n._call_user_data_handler(operation, n, notation) 1272 for e in self.entities._seq: 1273 entity = Entity(e.nodeName, e.publicId, e.systemId, 1274 e.notationName) 1275 entity.actualEncoding = e.actualEncoding 1276 entity.encoding = e.encoding 1277 entity.version = e.version 1278 clone.entities._seq.append(entity) 1279 e._call_user_data_handler(operation, n, entity) 1280 self._call_user_data_handler(operation, self, clone) 1281 return clone 1282 else: 1283 return None 1284 1285 def writexml(self, writer, indent="", addindent="", newl=""): 1286 writer.write("<!DOCTYPE ") 1287 writer.write(self.name) 1288 if self.publicId: 1289 writer.write("%s PUBLIC '%s'%s '%s'" 1290 % (newl, self.publicId, newl, self.systemId)) 1291 elif self.systemId: 1292 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1293 if self.internalSubset is not None: 1294 writer.write(" [") 1295 writer.write(self.internalSubset) 1296 writer.write("]") 1297 writer.write(">"+newl) 1298 1299 class Entity(Identified, Node): 1300 attributes = None 1301 nodeType = Node.ENTITY_NODE 1302 nodeValue = None 1303 1304 actualEncoding = None 1305 encoding = None 1306 version = None 1307 1308 def __init__(self, name, publicId, systemId, notation): 1309 self.nodeName = name 1310 self.notationName = notation 1311 self.childNodes = NodeList() 1312 self._identified_mixin_init(publicId, systemId) 1313 1314 def _get_actualEncoding(self): 1315 return self.actualEncoding 1316 1317 def _get_encoding(self): 1318 return self.encoding 1319 1320 def _get_version(self): 1321 return self.version 1322 1323 def appendChild(self, newChild): 1324 raise xml.dom.HierarchyRequestErr( 1325 "cannot append children to an entity node") 1326 1327 def insertBefore(self, newChild, refChild): 1328 raise xml.dom.HierarchyRequestErr( 1329 "cannot insert children below an entity node") 1330 1331 def removeChild(self, oldChild): 1332 raise xml.dom.HierarchyRequestErr( 1333 "cannot remove children from an entity node") 1334 1335 def replaceChild(self, newChild, oldChild): 1336 raise xml.dom.HierarchyRequestErr( 1337 "cannot replace children of an entity node") 1338 1339 class Notation(Identified, Childless, Node): 1340 nodeType = Node.NOTATION_NODE 1341 nodeValue = None 1342 1343 def __init__(self, name, publicId, systemId): 1344 self.nodeName = name 1345 self._identified_mixin_init(publicId, systemId) 1346 1347 1348 class DOMImplementation(DOMImplementationLS): 1349 _features = [("core", "1.0"), 1350 ("core", "2.0"), 1351 ("core", None), 1352 ("xml", "1.0"), 1353 ("xml", "2.0"), 1354 ("xml", None), 1355 ("ls-load", "3.0"), 1356 ("ls-load", None), 1357 ] 1358 1359 def hasFeature(self, feature, version): 1360 if version == "": 1361 version = None 1362 return (feature.lower(), version) in self._features 1363 1364 def createDocument(self, namespaceURI, qualifiedName, doctype): 1365 if doctype and doctype.parentNode is not None: 1366 raise xml.dom.WrongDocumentErr( 1367 "doctype object owned by another DOM tree") 1368 doc = self._create_document() 1369 1370 add_root_element = not (namespaceURI is None 1371 and qualifiedName is None 1372 and doctype is None) 1373 1374 if not qualifiedName and add_root_element: 1375 # The spec is unclear what to raise here; SyntaxErr 1376 # would be the other obvious candidate. Since Xerces raises 1377 # InvalidCharacterErr, and since SyntaxErr is not listed 1378 # for createDocument, that seems to be the better choice. 1379 # XXX: need to check for illegal characters here and in 1380 # createElement. 1381 1382 # DOM Level III clears this up when talking about the return value 1383 # of this function. If namespaceURI, qName and DocType are 1384 # Null the document is returned without a document element 1385 # Otherwise if doctype or namespaceURI are not None 1386 # Then we go back to the above problem 1387 raise xml.dom.InvalidCharacterErr("Element with no name") 1388 1389 if add_root_element: 1390 prefix, localname = _nssplit(qualifiedName) 1391 if prefix == "xml" \ 1392 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1393 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1394 if prefix and not namespaceURI: 1395 raise xml.dom.NamespaceErr( 1396 "illegal use of prefix without namespaces") 1397 element = doc.createElementNS(namespaceURI, qualifiedName) 1398 if doctype: 1399 doc.appendChild(doctype) 1400 doc.appendChild(element) 1401 1402 if doctype: 1403 doctype.parentNode = doctype.ownerDocument = doc 1404 1405 doc.doctype = doctype 1406 doc.implementation = self 1407 return doc 1408 1409 def createDocumentType(self, qualifiedName, publicId, systemId): 1410 doctype = DocumentType(qualifiedName) 1411 doctype.publicId = publicId 1412 doctype.systemId = systemId 1413 return doctype 1414 1415 # DOM Level 3 (WD 9 April 2002) 1416 1417 def getInterface(self, feature): 1418 if self.hasFeature(feature, None): 1419 return self 1420 else: 1421 return None 1422 1423 # internal 1424 def _create_document(self): 1425 return Document() 1426 1427 class ElementInfo(object): 1428 """Object that represents content-model information for an element. 1429 1430 This implementation is not expected to be used in practice; DOM 1431 builders should provide implementations which do the right thing 1432 using information available to it. 1433 1434 """ 1435 1436 __slots__ = 'tagName', 1437 1438 def __init__(self, name): 1439 self.tagName = name 1440 1441 def getAttributeType(self, aname): 1442 return _no_type 1443 1444 def getAttributeTypeNS(self, namespaceURI, localName): 1445 return _no_type 1446 1447 def isElementContent(self): 1448 return False 1449 1450 def isEmpty(self): 1451 """Returns true iff this element is declared to have an EMPTY 1452 content model.""" 1453 return False 1454 1455 def isId(self, aname): 1456 """Returns true iff the named attribute is a DTD-style ID.""" 1457 return False 1458 1459 def isIdNS(self, namespaceURI, localName): 1460 """Returns true iff the identified attribute is a DTD-style ID.""" 1461 return False 1462 1463 def __getstate__(self): 1464 return self.tagName 1465 1466 def __setstate__(self, state): 1467 self.tagName = state 1468 1469 def _clear_id_cache(node): 1470 if node.nodeType == Node.DOCUMENT_NODE: 1471 node._id_cache.clear() 1472 node._id_search_stack = None 1473 elif _in_document(node): 1474 node.ownerDocument._id_cache.clear() 1475 node.ownerDocument._id_search_stack= None 1476 1477 class Document(Node, DocumentLS): 1478 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1479 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1480 1481 nodeType = Node.DOCUMENT_NODE 1482 nodeName = "#document" 1483 nodeValue = None 1484 attributes = None 1485 doctype = None 1486 parentNode = None 1487 previousSibling = nextSibling = None 1488 1489 implementation = DOMImplementation() 1490 1491 # Document attributes from Level 3 (WD 9 April 2002) 1492 1493 actualEncoding = None 1494 encoding = None 1495 standalone = None 1496 version = None 1497 strictErrorChecking = False 1498 errorHandler = None 1499 documentURI = None 1500 1501 _magic_id_count = 0 1502 1503 def __init__(self): 1504 self.childNodes = NodeList() 1505 # mapping of (namespaceURI, localName) -> ElementInfo 1506 # and tagName -> ElementInfo 1507 self._elem_info = {} 1508 self._id_cache = {} 1509 self._id_search_stack = None 1510 1511 def _get_elem_info(self, element): 1512 if element.namespaceURI: 1513 key = element.namespaceURI, element.localName 1514 else: 1515 key = element.tagName 1516 return self._elem_info.get(key) 1517 1518 def _get_actualEncoding(self): 1519 return self.actualEncoding 1520 1521 def _get_doctype(self): 1522 return self.doctype 1523 1524 def _get_documentURI(self): 1525 return self.documentURI 1526 1527 def _get_encoding(self): 1528 return self.encoding 1529 1530 def _get_errorHandler(self): 1531 return self.errorHandler 1532 1533 def _get_standalone(self): 1534 return self.standalone 1535 1536 def _get_strictErrorChecking(self): 1537 return self.strictErrorChecking 1538 1539 def _get_version(self): 1540 return self.version 1541 1542 def appendChild(self, node): 1543 if node.nodeType not in self._child_node_types: 1544 raise xml.dom.HierarchyRequestErr( 1545 "%s cannot be child of %s" % (repr(node), repr(self))) 1546 if node.parentNode is not None: 1547 # This needs to be done before the next test since this 1548 # may *be* the document element, in which case it should 1549 # end up re-ordered to the end. 1550 node.parentNode.removeChild(node) 1551 1552 if node.nodeType == Node.ELEMENT_NODE \ 1553 and self._get_documentElement(): 1554 raise xml.dom.HierarchyRequestErr( 1555 "two document elements disallowed") 1556 return Node.appendChild(self, node) 1557 1558 def removeChild(self, oldChild): 1559 try: 1560 self.childNodes.remove(oldChild) 1561 except ValueError: 1562 raise xml.dom.NotFoundErr() 1563 oldChild.nextSibling = oldChild.previousSibling = None 1564 oldChild.parentNode = None 1565 if self.documentElement is oldChild: 1566 self.documentElement = None 1567 1568 return oldChild 1569 1570 def _get_documentElement(self): 1571 for node in self.childNodes: 1572 if node.nodeType == Node.ELEMENT_NODE: 1573 return node 1574 1575 def unlink(self): 1576 if self.doctype is not None: 1577 self.doctype.unlink() 1578 self.doctype = None 1579 Node.unlink(self) 1580 1581 def cloneNode(self, deep): 1582 if not deep: 1583 return None 1584 clone = self.implementation.createDocument(None, None, None) 1585 clone.encoding = self.encoding 1586 clone.standalone = self.standalone 1587 clone.version = self.version 1588 for n in self.childNodes: 1589 childclone = _clone_node(n, deep, clone) 1590 assert childclone.ownerDocument.isSameNode(clone) 1591 clone.childNodes.append(childclone) 1592 if childclone.nodeType == Node.DOCUMENT_NODE: 1593 assert clone.documentElement is None 1594 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1595 assert clone.doctype is None 1596 clone.doctype = childclone 1597 childclone.parentNode = clone 1598 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1599 self, clone) 1600 return clone 1601 1602 def createDocumentFragment(self): 1603 d = DocumentFragment() 1604 d.ownerDocument = self 1605 return d 1606 1607 def createElement(self, tagName): 1608 e = Element(tagName) 1609 e.ownerDocument = self 1610 return e 1611 1612 def createTextNode(self, data): 1613 if not isinstance(data, StringTypes): 1614 raise TypeError, "node contents must be a string" 1615 t = Text() 1616 t.data = data 1617 t.ownerDocument = self 1618 return t 1619 1620 def createCDATASection(self, data): 1621 if not isinstance(data, StringTypes): 1622 raise TypeError, "node contents must be a string" 1623 c = CDATASection() 1624 c.data = data 1625 c.ownerDocument = self 1626 return c 1627 1628 def createComment(self, data): 1629 c = Comment(data) 1630 c.ownerDocument = self 1631 return c 1632 1633 def createProcessingInstruction(self, target, data): 1634 p = ProcessingInstruction(target, data) 1635 p.ownerDocument = self 1636 return p 1637 1638 def createAttribute(self, qName): 1639 a = Attr(qName) 1640 a.ownerDocument = self 1641 a.value = "" 1642 return a 1643 1644 def createElementNS(self, namespaceURI, qualifiedName): 1645 prefix, localName = _nssplit(qualifiedName) 1646 e = Element(qualifiedName, namespaceURI, prefix) 1647 e.ownerDocument = self 1648 return e 1649 1650 def createAttributeNS(self, namespaceURI, qualifiedName): 1651 prefix, localName = _nssplit(qualifiedName) 1652 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1653 a.ownerDocument = self 1654 a.value = "" 1655 return a 1656 1657 # A couple of implementation-specific helpers to create node types 1658 # not supported by the W3C DOM specs: 1659 1660 def _create_entity(self, name, publicId, systemId, notationName): 1661 e = Entity(name, publicId, systemId, notationName) 1662 e.ownerDocument = self 1663 return e 1664 1665 def _create_notation(self, name, publicId, systemId): 1666 n = Notation(name, publicId, systemId) 1667 n.ownerDocument = self 1668 return n 1669 1670 def getElementById(self, id): 1671 if id in self._id_cache: 1672 return self._id_cache[id] 1673 if not (self._elem_info or self._magic_id_count): 1674 return None 1675 1676 stack = self._id_search_stack 1677 if stack is None: 1678 # we never searched before, or the cache has been cleared 1679 stack = [self.documentElement] 1680 self._id_search_stack = stack 1681 elif not stack: 1682 # Previous search was completed and cache is still valid; 1683 # no matching node. 1684 return None 1685 1686 result = None 1687 while stack: 1688 node = stack.pop() 1689 # add child elements to stack for continued searching 1690 stack.extend([child for child in node.childNodes 1691 if child.nodeType in _nodeTypes_with_children]) 1692 # check this node 1693 info = self._get_elem_info(node) 1694 if info: 1695 # We have to process all ID attributes before 1696 # returning in order to get all the attributes set to 1697 # be IDs using Element.setIdAttribute*(). 1698 for attr in node.attributes.values(): 1699 if attr.namespaceURI: 1700 if info.isIdNS(attr.namespaceURI, attr.localName): 1701 self._id_cache[attr.value] = node 1702 if attr.value == id: 1703 result = node 1704 elif not node._magic_id_nodes: 1705 break 1706 elif info.isId(attr.name): 1707 self._id_cache[attr.value] = node 1708 if attr.value == id: 1709 result = node 1710 elif not node._magic_id_nodes: 1711 break 1712 elif attr._is_id: 1713 self._id_cache[attr.value] = node 1714 if attr.value == id: 1715 result = node 1716 elif node._magic_id_nodes == 1: 1717 break 1718 elif node._magic_id_nodes: 1719 for attr in node.attributes.values(): 1720 if attr._is_id: 1721 self._id_cache[attr.value] = node 1722 if attr.value == id: 1723 result = node 1724 if result is not None: 1725 break 1726 return result 1727 1728 def getElementsByTagName(self, name): 1729 return _get_elements_by_tagName_helper(self, name, NodeList()) 1730 1731 def getElementsByTagNameNS(self, namespaceURI, localName): 1732 return _get_elements_by_tagName_ns_helper( 1733 self, namespaceURI, localName, NodeList()) 1734 1735 def isSupported(self, feature, version): 1736 return self.implementation.hasFeature(feature, version) 1737 1738 def importNode(self, node, deep): 1739 if node.nodeType == Node.DOCUMENT_NODE: 1740 raise xml.dom.NotSupportedErr("cannot import document nodes") 1741 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1742 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1743 return _clone_node(node, deep, self) 1744 1745 def writexml(self, writer, indent="", addindent="", newl="", 1746 encoding = None): 1747 if encoding is None: 1748 writer.write('<?xml version="1.0" ?>'+newl) 1749 else: 1750 writer.write('<?xml version="1.0" encoding="%s"?>%s' % (encoding, newl)) 1751 for node in self.childNodes: 1752 node.writexml(writer, indent, addindent, newl) 1753 1754 # DOM Level 3 (WD 9 April 2002) 1755 1756 def renameNode(self, n, namespaceURI, name): 1757 if n.ownerDocument is not self: 1758 raise xml.dom.WrongDocumentErr( 1759 "cannot rename nodes from other documents;\n" 1760 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1761 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1762 raise xml.dom.NotSupportedErr( 1763 "renameNode() only applies to element and attribute nodes") 1764 if namespaceURI != EMPTY_NAMESPACE: 1765 if ':' in name: 1766 prefix, localName = name.split(':', 1) 1767 if ( prefix == "xmlns" 1768 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1769 raise xml.dom.NamespaceErr( 1770 "illegal use of 'xmlns' prefix") 1771 else: 1772 if ( name == "xmlns" 1773 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1774 and n.nodeType == Node.ATTRIBUTE_NODE): 1775 raise xml.dom.NamespaceErr( 1776 "illegal use of the 'xmlns' attribute") 1777 prefix = None 1778 localName = name 1779 else: 1780 prefix = None 1781 localName = None 1782 if n.nodeType == Node.ATTRIBUTE_NODE: 1783 element = n.ownerElement 1784 if element is not None: 1785 is_id = n._is_id 1786 element.removeAttributeNode(n) 1787 else: 1788 element = None 1789 # avoid __setattr__ 1790 d = n.__dict__ 1791 d['prefix'] = prefix 1792 d['localName'] = localName 1793 d['namespaceURI'] = namespaceURI 1794 d['nodeName'] = name 1795 if n.nodeType == Node.ELEMENT_NODE: 1796 d['tagName'] = name 1797 else: 1798 # attribute node 1799 d['name'] = name 1800 if element is not None: 1801 element.setAttributeNode(n) 1802 if is_id: 1803 element.setIdAttributeNode(n) 1804 # It's not clear from a semantic perspective whether we should 1805 # call the user data handlers for the NODE_RENAMED event since 1806 # we're re-using the existing node. The draft spec has been 1807 # interpreted as meaning "no, don't call the handler unless a 1808 # new node is created." 1809 return n 1810 1811 defproperty(Document, "documentElement", 1812 doc="Top-level element of this document.") 1813 1814 1815 def _clone_node(node, deep, newOwnerDocument): 1816 """ 1817 Clone a node and give it the new owner document. 1818 Called by Node.cloneNode and Document.importNode 1819 """ 1820 if node.ownerDocument.isSameNode(newOwnerDocument): 1821 operation = xml.dom.UserDataHandler.NODE_CLONED 1822 else: 1823 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1824 if node.nodeType == Node.ELEMENT_NODE: 1825 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1826 node.nodeName) 1827 for attr in node.attributes.values(): 1828 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1829 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1830 a.specified = attr.specified 1831 1832 if deep: 1833 for child in node.childNodes: 1834 c = _clone_node(child, deep, newOwnerDocument) 1835 clone.appendChild(c) 1836 1837 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1838 clone = newOwnerDocument.createDocumentFragment() 1839 if deep: 1840 for child in node.childNodes: 1841 c = _clone_node(child, deep, newOwnerDocument) 1842 clone.appendChild(c) 1843 1844 elif node.nodeType == Node.TEXT_NODE: 1845 clone = newOwnerDocument.createTextNode(node.data) 1846 elif node.nodeType == Node.CDATA_SECTION_NODE: 1847 clone = newOwnerDocument.createCDATASection(node.data) 1848 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1849 clone = newOwnerDocument.createProcessingInstruction(node.target, 1850 node.data) 1851 elif node.nodeType == Node.COMMENT_NODE: 1852 clone = newOwnerDocument.createComment(node.data) 1853 elif node.nodeType == Node.ATTRIBUTE_NODE: 1854 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1855 node.nodeName) 1856 clone.specified = True 1857 clone.value = node.value 1858 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1859 assert node.ownerDocument is not newOwnerDocument 1860 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1861 clone = newOwnerDocument.implementation.createDocumentType( 1862 node.name, node.publicId, node.systemId) 1863 clone.ownerDocument = newOwnerDocument 1864 if deep: 1865 clone.entities._seq = [] 1866 clone.notations._seq = [] 1867 for n in node.notations._seq: 1868 notation = Notation(n.nodeName, n.publicId, n.systemId) 1869 notation.ownerDocument = newOwnerDocument 1870 clone.notations._seq.append(notation) 1871 if hasattr(n, '_call_user_data_handler'): 1872 n._call_user_data_handler(operation, n, notation) 1873 for e in node.entities._seq: 1874 entity = Entity(e.nodeName, e.publicId, e.systemId, 1875 e.notationName) 1876 entity.actualEncoding = e.actualEncoding 1877 entity.encoding = e.encoding 1878 entity.version = e.version 1879 entity.ownerDocument = newOwnerDocument 1880 clone.entities._seq.append(entity) 1881 if hasattr(e, '_call_user_data_handler'): 1882 e._call_user_data_handler(operation, n, entity) 1883 else: 1884 # Note the cloning of Document and DocumentType nodes is 1885 # implementation specific. minidom handles those cases 1886 # directly in the cloneNode() methods. 1887 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1888 1889 # Check for _call_user_data_handler() since this could conceivably 1890 # used with other DOM implementations (one of the FourThought 1891 # DOMs, perhaps?). 1892 if hasattr(node, '_call_user_data_handler'): 1893 node._call_user_data_handler(operation, node, clone) 1894 return clone 1895 1896 1897 def _nssplit(qualifiedName): 1898 fields = qualifiedName.split(':', 1) 1899 if len(fields) == 2: 1900 return fields 1901 else: 1902 return (None, fields[0]) 1903 1904 1905 def _get_StringIO(): 1906 # we can't use cStringIO since it doesn't support Unicode strings 1907 from StringIO import StringIO 1908 return StringIO() 1909 1910 def _do_pulldom_parse(func, args, kwargs): 1911 events = func(*args, **kwargs) 1912 toktype, rootNode = events.getEvent() 1913 events.expandNode(rootNode) 1914 events.clear() 1915 return rootNode 1916 1917 def parse(file, parser=None, bufsize=None): 1918 """Parse a file into a DOM by filename or file object.""" 1919 if parser is None and not bufsize: 1920 from xml.dom import expatbuilder 1921 return expatbuilder.parse(file) 1922 else: 1923 from xml.dom import pulldom 1924 return _do_pulldom_parse(pulldom.parse, (file,), 1925 {'parser': parser, 'bufsize': bufsize}) 1926 1927 def parseString(string, parser=None): 1928 """Parse a file into a DOM from a string.""" 1929 if parser is None: 1930 from xml.dom import expatbuilder 1931 return expatbuilder.parseString(string) 1932 else: 1933 from xml.dom import pulldom 1934 return _do_pulldom_parse(pulldom.parseString, (string,), 1935 {'parser': parser}) 1936 1937 def getDOMImplementation(features=None): 1938 if features: 1939 if isinstance(features, StringTypes): 1940 features = domreg._parse_feature_string(features) 1941 for f, v in features: 1942 if not Document.implementation.hasFeature(f, v): 1943 return None 1944 return Document.implementation 1945