1 """Simple implementation of the Level 1 DOM. 2 3 Namespaces and other minor Level 2 features are also supported. 4 5 parse("foo.xml") 6 7 parseString("<foo><bar/></foo>") 8 9 Todo: 10 ===== 11 * convenience methods for getting elements and text. 12 * more testing 13 * bring some of the writer and linearizer code into conformance with this 14 interface 15 * SAX 2 namespaces 16 """ 17 18 import io 19 import xml.dom 20 21 from xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE, domreg 22 from xml.dom.minicompat import * 23 from xml.dom.xmlbuilder import DOMImplementationLS, DocumentLS 24 25 # This is used by the ID-cache invalidation checks; the list isn't 26 # actually complete, since the nodes being checked will never be the 27 # DOCUMENT_NODE or DOCUMENT_FRAGMENT_NODE. (The node being checked is 28 # the node being added or removed, not the node being modified.) 29 # 30 _nodeTypes_with_children = (xml.dom.Node.ELEMENT_NODE, 31 xml.dom.Node.ENTITY_REFERENCE_NODE) 32 33 34 class Node(xml.dom.Node): 35 namespaceURI = None # this is non-null only for elements and attributes 36 parentNode = None 37 ownerDocument = None 38 nextSibling = None 39 previousSibling = None 40 41 prefix = EMPTY_PREFIX # non-null only for NS elements and attributes 42 43 def __bool__(self): 44 return True 45 46 def toxml(self, encoding=None): 47 return self.toprettyxml("", "", encoding) 48 49 def toprettyxml(self, indent="\t", newl="\n", encoding=None): 50 if encoding is None: 51 writer = io.StringIO() 52 else: 53 writer = io.TextIOWrapper(io.BytesIO(), 54 encoding=encoding, 55 errors="xmlcharrefreplace", 56 newline='\n') 57 if self.nodeType == Node.DOCUMENT_NODE: 58 # Can pass encoding only to document, to put it into XML header 59 self.writexml(writer, "", indent, newl, encoding) 60 else: 61 self.writexml(writer, "", indent, newl) 62 if encoding is None: 63 return writer.getvalue() 64 else: 65 return writer.detach().getvalue() 66 67 def hasChildNodes(self): 68 return bool(self.childNodes) 69 70 def _get_childNodes(self): 71 return self.childNodes 72 73 def _get_firstChild(self): 74 if self.childNodes: 75 return self.childNodes[0] 76 77 def _get_lastChild(self): 78 if self.childNodes: 79 return self.childNodes[-1] 80 81 def insertBefore(self, newChild, refChild): 82 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 83 for c in tuple(newChild.childNodes): 84 self.insertBefore(c, refChild) 85 ### The DOM does not clearly specify what to return in this case 86 return newChild 87 if newChild.nodeType not in self._child_node_types: 88 raise xml.dom.HierarchyRequestErr( 89 "%s cannot be child of %s" % (repr(newChild), repr(self))) 90 if newChild.parentNode is not None: 91 newChild.parentNode.removeChild(newChild) 92 if refChild is None: 93 self.appendChild(newChild) 94 else: 95 try: 96 index = self.childNodes.index(refChild) 97 except ValueError: 98 raise xml.dom.NotFoundErr() 99 if newChild.nodeType in _nodeTypes_with_children: 100 _clear_id_cache(self) 101 self.childNodes.insert(index, newChild) 102 newChild.nextSibling = refChild 103 refChild.previousSibling = newChild 104 if index: 105 node = self.childNodes[index-1] 106 node.nextSibling = newChild 107 newChild.previousSibling = node 108 else: 109 newChild.previousSibling = None 110 newChild.parentNode = self 111 return newChild 112 113 def appendChild(self, node): 114 if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: 115 for c in tuple(node.childNodes): 116 self.appendChild(c) 117 ### The DOM does not clearly specify what to return in this case 118 return node 119 if node.nodeType not in self._child_node_types: 120 raise xml.dom.HierarchyRequestErr( 121 "%s cannot be child of %s" % (repr(node), repr(self))) 122 elif node.nodeType in _nodeTypes_with_children: 123 _clear_id_cache(self) 124 if node.parentNode is not None: 125 node.parentNode.removeChild(node) 126 _append_child(self, node) 127 node.nextSibling = None 128 return node 129 130 def replaceChild(self, newChild, oldChild): 131 if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: 132 refChild = oldChild.nextSibling 133 self.removeChild(oldChild) 134 return self.insertBefore(newChild, refChild) 135 if newChild.nodeType not in self._child_node_types: 136 raise xml.dom.HierarchyRequestErr( 137 "%s cannot be child of %s" % (repr(newChild), repr(self))) 138 if newChild is oldChild: 139 return 140 if newChild.parentNode is not None: 141 newChild.parentNode.removeChild(newChild) 142 try: 143 index = self.childNodes.index(oldChild) 144 except ValueError: 145 raise xml.dom.NotFoundErr() 146 self.childNodes[index] = newChild 147 newChild.parentNode = self 148 oldChild.parentNode = None 149 if (newChild.nodeType in _nodeTypes_with_children 150 or oldChild.nodeType in _nodeTypes_with_children): 151 _clear_id_cache(self) 152 newChild.nextSibling = oldChild.nextSibling 153 newChild.previousSibling = oldChild.previousSibling 154 oldChild.nextSibling = None 155 oldChild.previousSibling = None 156 if newChild.previousSibling: 157 newChild.previousSibling.nextSibling = newChild 158 if newChild.nextSibling: 159 newChild.nextSibling.previousSibling = newChild 160 return oldChild 161 162 def removeChild(self, oldChild): 163 try: 164 self.childNodes.remove(oldChild) 165 except ValueError: 166 raise xml.dom.NotFoundErr() 167 if oldChild.nextSibling is not None: 168 oldChild.nextSibling.previousSibling = oldChild.previousSibling 169 if oldChild.previousSibling is not None: 170 oldChild.previousSibling.nextSibling = oldChild.nextSibling 171 oldChild.nextSibling = oldChild.previousSibling = None 172 if oldChild.nodeType in _nodeTypes_with_children: 173 _clear_id_cache(self) 174 175 oldChild.parentNode = None 176 return oldChild 177 178 def normalize(self): 179 L = [] 180 for child in self.childNodes: 181 if child.nodeType == Node.TEXT_NODE: 182 if not child.data: 183 # empty text node; discard 184 if L: 185 L[-1].nextSibling = child.nextSibling 186 if child.nextSibling: 187 child.nextSibling.previousSibling = child.previousSibling 188 child.unlink() 189 elif L and L[-1].nodeType == child.nodeType: 190 # collapse text node 191 node = L[-1] 192 node.data = node.data + child.data 193 node.nextSibling = child.nextSibling 194 if child.nextSibling: 195 child.nextSibling.previousSibling = node 196 child.unlink() 197 else: 198 L.append(child) 199 else: 200 L.append(child) 201 if child.nodeType == Node.ELEMENT_NODE: 202 child.normalize() 203 self.childNodes[:] = L 204 205 def cloneNode(self, deep): 206 return _clone_node(self, deep, self.ownerDocument or self) 207 208 def isSupported(self, feature, version): 209 return self.ownerDocument.implementation.hasFeature(feature, version) 210 211 def _get_localName(self): 212 # Overridden in Element and Attr where localName can be Non-Null 213 return None 214 215 # Node interfaces from Level 3 (WD 9 April 2002) 216 217 def isSameNode(self, other): 218 return self is other 219 220 def getInterface(self, feature): 221 if self.isSupported(feature, None): 222 return self 223 else: 224 return None 225 226 # The "user data" functions use a dictionary that is only present 227 # if some user data has been set, so be careful not to assume it 228 # exists. 229 230 def getUserData(self, key): 231 try: 232 return self._user_data[key][0] 233 except (AttributeError, KeyError): 234 return None 235 236 def setUserData(self, key, data, handler): 237 old = None 238 try: 239 d = self._user_data 240 except AttributeError: 241 d = {} 242 self._user_data = d 243 if key in d: 244 old = d[key][0] 245 if data is None: 246 # ignore handlers passed for None 247 handler = None 248 if old is not None: 249 del d[key] 250 else: 251 d[key] = (data, handler) 252 return old 253 254 def _call_user_data_handler(self, operation, src, dst): 255 if hasattr(self, "_user_data"): 256 for key, (data, handler) in list(self._user_data.items()): 257 if handler is not None: 258 handler.handle(operation, key, data, src, dst) 259 260 # minidom-specific API: 261 262 def unlink(self): 263 self.parentNode = self.ownerDocument = None 264 if self.childNodes: 265 for child in self.childNodes: 266 child.unlink() 267 self.childNodes = NodeList() 268 self.previousSibling = None 269 self.nextSibling = None 270 271 # A Node is its own context manager, to ensure that an unlink() call occurs. 272 # This is similar to how a file object works. 273 def __enter__(self): 274 return self 275 276 def __exit__(self, et, ev, tb): 277 self.unlink() 278 279 defproperty(Node, "firstChild", doc="First child node, or None.") 280 defproperty(Node, "lastChild", doc="Last child node, or None.") 281 defproperty(Node, "localName", doc="Namespace-local name of this node.") 282 283 284 def _append_child(self, node): 285 # fast path with less checks; usable by DOM builders if careful 286 childNodes = self.childNodes 287 if childNodes: 288 last = childNodes[-1] 289 node.previousSibling = last 290 last.nextSibling = node 291 childNodes.append(node) 292 node.parentNode = self 293 294 def _in_document(node): 295 # return True iff node is part of a document tree 296 while node is not None: 297 if node.nodeType == Node.DOCUMENT_NODE: 298 return True 299 node = node.parentNode 300 return False 301 302 def _write_data(writer, data): 303 "Writes datachars to writer." 304 if data: 305 data = data.replace("&", "&").replace("<", "<"). \ 306 replace("\"", """).replace(">", ">") 307 writer.write(data) 308 309 def _get_elements_by_tagName_helper(parent, name, rc): 310 for node in parent.childNodes: 311 if node.nodeType == Node.ELEMENT_NODE and \ 312 (name == "*" or node.tagName == name): 313 rc.append(node) 314 _get_elements_by_tagName_helper(node, name, rc) 315 return rc 316 317 def _get_elements_by_tagName_ns_helper(parent, nsURI, localName, rc): 318 for node in parent.childNodes: 319 if node.nodeType == Node.ELEMENT_NODE: 320 if ((localName == "*" or node.localName == localName) and 321 (nsURI == "*" or node.namespaceURI == nsURI)): 322 rc.append(node) 323 _get_elements_by_tagName_ns_helper(node, nsURI, localName, rc) 324 return rc 325 326 class DocumentFragment(Node): 327 nodeType = Node.DOCUMENT_FRAGMENT_NODE 328 nodeName = "#document-fragment" 329 nodeValue = None 330 attributes = None 331 parentNode = None 332 _child_node_types = (Node.ELEMENT_NODE, 333 Node.TEXT_NODE, 334 Node.CDATA_SECTION_NODE, 335 Node.ENTITY_REFERENCE_NODE, 336 Node.PROCESSING_INSTRUCTION_NODE, 337 Node.COMMENT_NODE, 338 Node.NOTATION_NODE) 339 340 def __init__(self): 341 self.childNodes = NodeList() 342 343 344 class Attr(Node): 345 __slots__=('_name', '_value', 'namespaceURI', 346 '_prefix', 'childNodes', '_localName', 'ownerDocument', 'ownerElement') 347 nodeType = Node.ATTRIBUTE_NODE 348 attributes = None 349 specified = False 350 _is_id = False 351 352 _child_node_types = (Node.TEXT_NODE, Node.ENTITY_REFERENCE_NODE) 353 354 def __init__(self, qName, namespaceURI=EMPTY_NAMESPACE, localName=None, 355 prefix=None): 356 self.ownerElement = None 357 self._name = qName 358 self.namespaceURI = namespaceURI 359 self._prefix = prefix 360 self.childNodes = NodeList() 361 362 # Add the single child node that represents the value of the attr 363 self.childNodes.append(Text()) 364 365 # nodeValue and value are set elsewhere 366 367 def _get_localName(self): 368 try: 369 return self._localName 370 except AttributeError: 371 return self.nodeName.split(":", 1)[-1] 372 373 def _get_specified(self): 374 return self.specified 375 376 def _get_name(self): 377 return self._name 378 379 def _set_name(self, value): 380 self._name = value 381 if self.ownerElement is not None: 382 _clear_id_cache(self.ownerElement) 383 384 nodeName = name = property(_get_name, _set_name) 385 386 def _get_value(self): 387 return self._value 388 389 def _set_value(self, value): 390 self._value = value 391 self.childNodes[0].data = value 392 if self.ownerElement is not None: 393 _clear_id_cache(self.ownerElement) 394 self.childNodes[0].data = value 395 396 nodeValue = value = property(_get_value, _set_value) 397 398 def _get_prefix(self): 399 return self._prefix 400 401 def _set_prefix(self, prefix): 402 nsuri = self.namespaceURI 403 if prefix == "xmlns": 404 if nsuri and nsuri != XMLNS_NAMESPACE: 405 raise xml.dom.NamespaceErr( 406 "illegal use of 'xmlns' prefix for the wrong namespace") 407 self._prefix = prefix 408 if prefix is None: 409 newName = self.localName 410 else: 411 newName = "%s:%s" % (prefix, self.localName) 412 if self.ownerElement: 413 _clear_id_cache(self.ownerElement) 414 self.name = newName 415 416 prefix = property(_get_prefix, _set_prefix) 417 418 def unlink(self): 419 # This implementation does not call the base implementation 420 # since most of that is not needed, and the expense of the 421 # method call is not warranted. We duplicate the removal of 422 # children, but that's all we needed from the base class. 423 elem = self.ownerElement 424 if elem is not None: 425 del elem._attrs[self.nodeName] 426 del elem._attrsNS[(self.namespaceURI, self.localName)] 427 if self._is_id: 428 self._is_id = False 429 elem._magic_id_nodes -= 1 430 self.ownerDocument._magic_id_count -= 1 431 for child in self.childNodes: 432 child.unlink() 433 del self.childNodes[:] 434 435 def _get_isId(self): 436 if self._is_id: 437 return True 438 doc = self.ownerDocument 439 elem = self.ownerElement 440 if doc is None or elem is None: 441 return False 442 443 info = doc._get_elem_info(elem) 444 if info is None: 445 return False 446 if self.namespaceURI: 447 return info.isIdNS(self.namespaceURI, self.localName) 448 else: 449 return info.isId(self.nodeName) 450 451 def _get_schemaType(self): 452 doc = self.ownerDocument 453 elem = self.ownerElement 454 if doc is None or elem is None: 455 return _no_type 456 457 info = doc._get_elem_info(elem) 458 if info is None: 459 return _no_type 460 if self.namespaceURI: 461 return info.getAttributeTypeNS(self.namespaceURI, self.localName) 462 else: 463 return info.getAttributeType(self.nodeName) 464 465 defproperty(Attr, "isId", doc="True if this attribute is an ID.") 466 defproperty(Attr, "localName", doc="Namespace-local name of this attribute.") 467 defproperty(Attr, "schemaType", doc="Schema type for this attribute.") 468 469 470 class NamedNodeMap(object): 471 """The attribute list is a transient interface to the underlying 472 dictionaries. Mutations here will change the underlying element's 473 dictionary. 474 475 Ordering is imposed artificially and does not reflect the order of 476 attributes as found in an input document. 477 """ 478 479 __slots__ = ('_attrs', '_attrsNS', '_ownerElement') 480 481 def __init__(self, attrs, attrsNS, ownerElement): 482 self._attrs = attrs 483 self._attrsNS = attrsNS 484 self._ownerElement = ownerElement 485 486 def _get_length(self): 487 return len(self._attrs) 488 489 def item(self, index): 490 try: 491 return self[list(self._attrs.keys())[index]] 492 except IndexError: 493 return None 494 495 def items(self): 496 L = [] 497 for node in self._attrs.values(): 498 L.append((node.nodeName, node.value)) 499 return L 500 501 def itemsNS(self): 502 L = [] 503 for node in self._attrs.values(): 504 L.append(((node.namespaceURI, node.localName), node.value)) 505 return L 506 507 def __contains__(self, key): 508 if isinstance(key, str): 509 return key in self._attrs 510 else: 511 return key in self._attrsNS 512 513 def keys(self): 514 return self._attrs.keys() 515 516 def keysNS(self): 517 return self._attrsNS.keys() 518 519 def values(self): 520 return self._attrs.values() 521 522 def get(self, name, value=None): 523 return self._attrs.get(name, value) 524 525 __len__ = _get_length 526 527 def _cmp(self, other): 528 if self._attrs is getattr(other, "_attrs", None): 529 return 0 530 else: 531 return (id(self) > id(other)) - (id(self) < id(other)) 532 533 def __eq__(self, other): 534 return self._cmp(other) == 0 535 536 def __ge__(self, other): 537 return self._cmp(other) >= 0 538 539 def __gt__(self, other): 540 return self._cmp(other) > 0 541 542 def __le__(self, other): 543 return self._cmp(other) <= 0 544 545 def __lt__(self, other): 546 return self._cmp(other) < 0 547 548 def __getitem__(self, attname_or_tuple): 549 if isinstance(attname_or_tuple, tuple): 550 return self._attrsNS[attname_or_tuple] 551 else: 552 return self._attrs[attname_or_tuple] 553 554 # same as set 555 def __setitem__(self, attname, value): 556 if isinstance(value, str): 557 try: 558 node = self._attrs[attname] 559 except KeyError: 560 node = Attr(attname) 561 node.ownerDocument = self._ownerElement.ownerDocument 562 self.setNamedItem(node) 563 node.value = value 564 else: 565 if not isinstance(value, Attr): 566 raise TypeError("value must be a string or Attr object") 567 node = value 568 self.setNamedItem(node) 569 570 def getNamedItem(self, name): 571 try: 572 return self._attrs[name] 573 except KeyError: 574 return None 575 576 def getNamedItemNS(self, namespaceURI, localName): 577 try: 578 return self._attrsNS[(namespaceURI, localName)] 579 except KeyError: 580 return None 581 582 def removeNamedItem(self, name): 583 n = self.getNamedItem(name) 584 if n is not None: 585 _clear_id_cache(self._ownerElement) 586 del self._attrs[n.nodeName] 587 del self._attrsNS[(n.namespaceURI, n.localName)] 588 if hasattr(n, 'ownerElement'): 589 n.ownerElement = None 590 return n 591 else: 592 raise xml.dom.NotFoundErr() 593 594 def removeNamedItemNS(self, namespaceURI, localName): 595 n = self.getNamedItemNS(namespaceURI, localName) 596 if n is not None: 597 _clear_id_cache(self._ownerElement) 598 del self._attrsNS[(n.namespaceURI, n.localName)] 599 del self._attrs[n.nodeName] 600 if hasattr(n, 'ownerElement'): 601 n.ownerElement = None 602 return n 603 else: 604 raise xml.dom.NotFoundErr() 605 606 def setNamedItem(self, node): 607 if not isinstance(node, Attr): 608 raise xml.dom.HierarchyRequestErr( 609 "%s cannot be child of %s" % (repr(node), repr(self))) 610 old = self._attrs.get(node.name) 611 if old: 612 old.unlink() 613 self._attrs[node.name] = node 614 self._attrsNS[(node.namespaceURI, node.localName)] = node 615 node.ownerElement = self._ownerElement 616 _clear_id_cache(node.ownerElement) 617 return old 618 619 def setNamedItemNS(self, node): 620 return self.setNamedItem(node) 621 622 def __delitem__(self, attname_or_tuple): 623 node = self[attname_or_tuple] 624 _clear_id_cache(node.ownerElement) 625 node.unlink() 626 627 def __getstate__(self): 628 return self._attrs, self._attrsNS, self._ownerElement 629 630 def __setstate__(self, state): 631 self._attrs, self._attrsNS, self._ownerElement = state 632 633 defproperty(NamedNodeMap, "length", 634 doc="Number of nodes in the NamedNodeMap.") 635 636 AttributeList = NamedNodeMap 637 638 639 class TypeInfo(object): 640 __slots__ = 'namespace', 'name' 641 642 def __init__(self, namespace, name): 643 self.namespace = namespace 644 self.name = name 645 646 def __repr__(self): 647 if self.namespace: 648 return "<%s %r (from %r)>" % (self.__class__.__name__, self.name, 649 self.namespace) 650 else: 651 return "<%s %r>" % (self.__class__.__name__, self.name) 652 653 def _get_name(self): 654 return self.name 655 656 def _get_namespace(self): 657 return self.namespace 658 659 _no_type = TypeInfo(None, None) 660 661 class Element(Node): 662 __slots__=('ownerDocument', 'parentNode', 'tagName', 'nodeName', 'prefix', 663 'namespaceURI', '_localName', 'childNodes', '_attrs', '_attrsNS', 664 'nextSibling', 'previousSibling') 665 nodeType = Node.ELEMENT_NODE 666 nodeValue = None 667 schemaType = _no_type 668 669 _magic_id_nodes = 0 670 671 _child_node_types = (Node.ELEMENT_NODE, 672 Node.PROCESSING_INSTRUCTION_NODE, 673 Node.COMMENT_NODE, 674 Node.TEXT_NODE, 675 Node.CDATA_SECTION_NODE, 676 Node.ENTITY_REFERENCE_NODE) 677 678 def __init__(self, tagName, namespaceURI=EMPTY_NAMESPACE, prefix=None, 679 localName=None): 680 self.parentNode = None 681 self.tagName = self.nodeName = tagName 682 self.prefix = prefix 683 self.namespaceURI = namespaceURI 684 self.childNodes = NodeList() 685 self.nextSibling = self.previousSibling = None 686 687 # Attribute dictionaries are lazily created 688 # attributes are double-indexed: 689 # tagName -> Attribute 690 # URI,localName -> Attribute 691 # in the future: consider lazy generation 692 # of attribute objects this is too tricky 693 # for now because of headaches with 694 # namespaces. 695 self._attrs = None 696 self._attrsNS = None 697 698 def _ensure_attributes(self): 699 if self._attrs is None: 700 self._attrs = {} 701 self._attrsNS = {} 702 703 def _get_localName(self): 704 try: 705 return self._localName 706 except AttributeError: 707 return self.tagName.split(":", 1)[-1] 708 709 def _get_tagName(self): 710 return self.tagName 711 712 def unlink(self): 713 if self._attrs is not None: 714 for attr in list(self._attrs.values()): 715 attr.unlink() 716 self._attrs = None 717 self._attrsNS = None 718 Node.unlink(self) 719 720 def getAttribute(self, attname): 721 if self._attrs is None: 722 return "" 723 try: 724 return self._attrs[attname].value 725 except KeyError: 726 return "" 727 728 def getAttributeNS(self, namespaceURI, localName): 729 if self._attrsNS is None: 730 return "" 731 try: 732 return self._attrsNS[(namespaceURI, localName)].value 733 except KeyError: 734 return "" 735 736 def setAttribute(self, attname, value): 737 attr = self.getAttributeNode(attname) 738 if attr is None: 739 attr = Attr(attname) 740 attr.value = value # also sets nodeValue 741 attr.ownerDocument = self.ownerDocument 742 self.setAttributeNode(attr) 743 elif value != attr.value: 744 attr.value = value 745 if attr.isId: 746 _clear_id_cache(self) 747 748 def setAttributeNS(self, namespaceURI, qualifiedName, value): 749 prefix, localname = _nssplit(qualifiedName) 750 attr = self.getAttributeNodeNS(namespaceURI, localname) 751 if attr is None: 752 attr = Attr(qualifiedName, namespaceURI, localname, prefix) 753 attr.value = value 754 attr.ownerDocument = self.ownerDocument 755 self.setAttributeNode(attr) 756 else: 757 if value != attr.value: 758 attr.value = value 759 if attr.isId: 760 _clear_id_cache(self) 761 if attr.prefix != prefix: 762 attr.prefix = prefix 763 attr.nodeName = qualifiedName 764 765 def getAttributeNode(self, attrname): 766 if self._attrs is None: 767 return None 768 return self._attrs.get(attrname) 769 770 def getAttributeNodeNS(self, namespaceURI, localName): 771 if self._attrsNS is None: 772 return None 773 return self._attrsNS.get((namespaceURI, localName)) 774 775 def setAttributeNode(self, attr): 776 if attr.ownerElement not in (None, self): 777 raise xml.dom.InuseAttributeErr("attribute node already owned") 778 self._ensure_attributes() 779 old1 = self._attrs.get(attr.name, None) 780 if old1 is not None: 781 self.removeAttributeNode(old1) 782 old2 = self._attrsNS.get((attr.namespaceURI, attr.localName), None) 783 if old2 is not None and old2 is not old1: 784 self.removeAttributeNode(old2) 785 _set_attribute_node(self, attr) 786 787 if old1 is not attr: 788 # It might have already been part of this node, in which case 789 # it doesn't represent a change, and should not be returned. 790 return old1 791 if old2 is not attr: 792 return old2 793 794 setAttributeNodeNS = setAttributeNode 795 796 def removeAttribute(self, name): 797 if self._attrsNS is None: 798 raise xml.dom.NotFoundErr() 799 try: 800 attr = self._attrs[name] 801 except KeyError: 802 raise xml.dom.NotFoundErr() 803 self.removeAttributeNode(attr) 804 805 def removeAttributeNS(self, namespaceURI, localName): 806 if self._attrsNS is None: 807 raise xml.dom.NotFoundErr() 808 try: 809 attr = self._attrsNS[(namespaceURI, localName)] 810 except KeyError: 811 raise xml.dom.NotFoundErr() 812 self.removeAttributeNode(attr) 813 814 def removeAttributeNode(self, node): 815 if node is None: 816 raise xml.dom.NotFoundErr() 817 try: 818 self._attrs[node.name] 819 except KeyError: 820 raise xml.dom.NotFoundErr() 821 _clear_id_cache(self) 822 node.unlink() 823 # Restore this since the node is still useful and otherwise 824 # unlinked 825 node.ownerDocument = self.ownerDocument 826 827 removeAttributeNodeNS = removeAttributeNode 828 829 def hasAttribute(self, name): 830 if self._attrs is None: 831 return False 832 return name in self._attrs 833 834 def hasAttributeNS(self, namespaceURI, localName): 835 if self._attrsNS is None: 836 return False 837 return (namespaceURI, localName) in self._attrsNS 838 839 def getElementsByTagName(self, name): 840 return _get_elements_by_tagName_helper(self, name, NodeList()) 841 842 def getElementsByTagNameNS(self, namespaceURI, localName): 843 return _get_elements_by_tagName_ns_helper( 844 self, namespaceURI, localName, NodeList()) 845 846 def __repr__(self): 847 return "<DOM Element: %s at %#x>" % (self.tagName, id(self)) 848 849 def writexml(self, writer, indent="", addindent="", newl=""): 850 # indent = current indentation 851 # addindent = indentation to add to higher levels 852 # newl = newline string 853 writer.write(indent+"<" + self.tagName) 854 855 attrs = self._get_attributes() 856 a_names = sorted(attrs.keys()) 857 858 for a_name in a_names: 859 writer.write(" %s=\"" % a_name) 860 _write_data(writer, attrs[a_name].value) 861 writer.write("\"") 862 if self.childNodes: 863 writer.write(">") 864 if (len(self.childNodes) == 1 and 865 self.childNodes[0].nodeType == Node.TEXT_NODE): 866 self.childNodes[0].writexml(writer, '', '', '') 867 else: 868 writer.write(newl) 869 for node in self.childNodes: 870 node.writexml(writer, indent+addindent, addindent, newl) 871 writer.write(indent) 872 writer.write("</%s>%s" % (self.tagName, newl)) 873 else: 874 writer.write("/>%s"%(newl)) 875 876 def _get_attributes(self): 877 self._ensure_attributes() 878 return NamedNodeMap(self._attrs, self._attrsNS, self) 879 880 def hasAttributes(self): 881 if self._attrs: 882 return True 883 else: 884 return False 885 886 # DOM Level 3 attributes, based on the 22 Oct 2002 draft 887 888 def setIdAttribute(self, name): 889 idAttr = self.getAttributeNode(name) 890 self.setIdAttributeNode(idAttr) 891 892 def setIdAttributeNS(self, namespaceURI, localName): 893 idAttr = self.getAttributeNodeNS(namespaceURI, localName) 894 self.setIdAttributeNode(idAttr) 895 896 def setIdAttributeNode(self, idAttr): 897 if idAttr is None or not self.isSameNode(idAttr.ownerElement): 898 raise xml.dom.NotFoundErr() 899 if _get_containing_entref(self) is not None: 900 raise xml.dom.NoModificationAllowedErr() 901 if not idAttr._is_id: 902 idAttr._is_id = True 903 self._magic_id_nodes += 1 904 self.ownerDocument._magic_id_count += 1 905 _clear_id_cache(self) 906 907 defproperty(Element, "attributes", 908 doc="NamedNodeMap of attributes on the element.") 909 defproperty(Element, "localName", 910 doc="Namespace-local name of this element.") 911 912 913 def _set_attribute_node(element, attr): 914 _clear_id_cache(element) 915 element._ensure_attributes() 916 element._attrs[attr.name] = attr 917 element._attrsNS[(attr.namespaceURI, attr.localName)] = attr 918 919 # This creates a circular reference, but Element.unlink() 920 # breaks the cycle since the references to the attribute 921 # dictionaries are tossed. 922 attr.ownerElement = element 923 924 class Childless: 925 """Mixin that makes childless-ness easy to implement and avoids 926 the complexity of the Node methods that deal with children. 927 """ 928 __slots__ = () 929 930 attributes = None 931 childNodes = EmptyNodeList() 932 firstChild = None 933 lastChild = None 934 935 def _get_firstChild(self): 936 return None 937 938 def _get_lastChild(self): 939 return None 940 941 def appendChild(self, node): 942 raise xml.dom.HierarchyRequestErr( 943 self.nodeName + " nodes cannot have children") 944 945 def hasChildNodes(self): 946 return False 947 948 def insertBefore(self, newChild, refChild): 949 raise xml.dom.HierarchyRequestErr( 950 self.nodeName + " nodes do not have children") 951 952 def removeChild(self, oldChild): 953 raise xml.dom.NotFoundErr( 954 self.nodeName + " nodes do not have children") 955 956 def normalize(self): 957 # For childless nodes, normalize() has nothing to do. 958 pass 959 960 def replaceChild(self, newChild, oldChild): 961 raise xml.dom.HierarchyRequestErr( 962 self.nodeName + " nodes do not have children") 963 964 965 class ProcessingInstruction(Childless, Node): 966 nodeType = Node.PROCESSING_INSTRUCTION_NODE 967 __slots__ = ('target', 'data') 968 969 def __init__(self, target, data): 970 self.target = target 971 self.data = data 972 973 # nodeValue is an alias for data 974 def _get_nodeValue(self): 975 return self.data 976 def _set_nodeValue(self, value): 977 self.data = value 978 nodeValue = property(_get_nodeValue, _set_nodeValue) 979 980 # nodeName is an alias for target 981 def _get_nodeName(self): 982 return self.target 983 def _set_nodeName(self, value): 984 self.target = value 985 nodeName = property(_get_nodeName, _set_nodeName) 986 987 def writexml(self, writer, indent="", addindent="", newl=""): 988 writer.write("%s<?%s %s?>%s" % (indent,self.target, self.data, newl)) 989 990 991 class CharacterData(Childless, Node): 992 __slots__=('_data', 'ownerDocument','parentNode', 'previousSibling', 'nextSibling') 993 994 def __init__(self): 995 self.ownerDocument = self.parentNode = None 996 self.previousSibling = self.nextSibling = None 997 self._data = '' 998 Node.__init__(self) 999 1000 def _get_length(self): 1001 return len(self.data) 1002 __len__ = _get_length 1003 1004 def _get_data(self): 1005 return self._data 1006 def _set_data(self, data): 1007 self._data = data 1008 1009 data = nodeValue = property(_get_data, _set_data) 1010 1011 def __repr__(self): 1012 data = self.data 1013 if len(data) > 10: 1014 dotdotdot = "..." 1015 else: 1016 dotdotdot = "" 1017 return '<DOM %s node "%r%s">' % ( 1018 self.__class__.__name__, data[0:10], dotdotdot) 1019 1020 def substringData(self, offset, count): 1021 if offset < 0: 1022 raise xml.dom.IndexSizeErr("offset cannot be negative") 1023 if offset >= len(self.data): 1024 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1025 if count < 0: 1026 raise xml.dom.IndexSizeErr("count cannot be negative") 1027 return self.data[offset:offset+count] 1028 1029 def appendData(self, arg): 1030 self.data = self.data + arg 1031 1032 def insertData(self, offset, arg): 1033 if offset < 0: 1034 raise xml.dom.IndexSizeErr("offset cannot be negative") 1035 if offset >= len(self.data): 1036 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1037 if arg: 1038 self.data = "%s%s%s" % ( 1039 self.data[:offset], arg, self.data[offset:]) 1040 1041 def deleteData(self, offset, count): 1042 if offset < 0: 1043 raise xml.dom.IndexSizeErr("offset cannot be negative") 1044 if offset >= len(self.data): 1045 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1046 if count < 0: 1047 raise xml.dom.IndexSizeErr("count cannot be negative") 1048 if count: 1049 self.data = self.data[:offset] + self.data[offset+count:] 1050 1051 def replaceData(self, offset, count, arg): 1052 if offset < 0: 1053 raise xml.dom.IndexSizeErr("offset cannot be negative") 1054 if offset >= len(self.data): 1055 raise xml.dom.IndexSizeErr("offset cannot be beyond end of data") 1056 if count < 0: 1057 raise xml.dom.IndexSizeErr("count cannot be negative") 1058 if count: 1059 self.data = "%s%s%s" % ( 1060 self.data[:offset], arg, self.data[offset+count:]) 1061 1062 defproperty(CharacterData, "length", doc="Length of the string data.") 1063 1064 1065 class Text(CharacterData): 1066 __slots__ = () 1067 1068 nodeType = Node.TEXT_NODE 1069 nodeName = "#text" 1070 attributes = None 1071 1072 def splitText(self, offset): 1073 if offset < 0 or offset > len(self.data): 1074 raise xml.dom.IndexSizeErr("illegal offset value") 1075 newText = self.__class__() 1076 newText.data = self.data[offset:] 1077 newText.ownerDocument = self.ownerDocument 1078 next = self.nextSibling 1079 if self.parentNode and self in self.parentNode.childNodes: 1080 if next is None: 1081 self.parentNode.appendChild(newText) 1082 else: 1083 self.parentNode.insertBefore(newText, next) 1084 self.data = self.data[:offset] 1085 return newText 1086 1087 def writexml(self, writer, indent="", addindent="", newl=""): 1088 _write_data(writer, "%s%s%s" % (indent, self.data, newl)) 1089 1090 # DOM Level 3 (WD 9 April 2002) 1091 1092 def _get_wholeText(self): 1093 L = [self.data] 1094 n = self.previousSibling 1095 while n is not None: 1096 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1097 L.insert(0, n.data) 1098 n = n.previousSibling 1099 else: 1100 break 1101 n = self.nextSibling 1102 while n is not None: 1103 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1104 L.append(n.data) 1105 n = n.nextSibling 1106 else: 1107 break 1108 return ''.join(L) 1109 1110 def replaceWholeText(self, content): 1111 # XXX This needs to be seriously changed if minidom ever 1112 # supports EntityReference nodes. 1113 parent = self.parentNode 1114 n = self.previousSibling 1115 while n is not None: 1116 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1117 next = n.previousSibling 1118 parent.removeChild(n) 1119 n = next 1120 else: 1121 break 1122 n = self.nextSibling 1123 if not content: 1124 parent.removeChild(self) 1125 while n is not None: 1126 if n.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 1127 next = n.nextSibling 1128 parent.removeChild(n) 1129 n = next 1130 else: 1131 break 1132 if content: 1133 self.data = content 1134 return self 1135 else: 1136 return None 1137 1138 def _get_isWhitespaceInElementContent(self): 1139 if self.data.strip(): 1140 return False 1141 elem = _get_containing_element(self) 1142 if elem is None: 1143 return False 1144 info = self.ownerDocument._get_elem_info(elem) 1145 if info is None: 1146 return False 1147 else: 1148 return info.isElementContent() 1149 1150 defproperty(Text, "isWhitespaceInElementContent", 1151 doc="True iff this text node contains only whitespace" 1152 " and is in element content.") 1153 defproperty(Text, "wholeText", 1154 doc="The text of all logically-adjacent text nodes.") 1155 1156 1157 def _get_containing_element(node): 1158 c = node.parentNode 1159 while c is not None: 1160 if c.nodeType == Node.ELEMENT_NODE: 1161 return c 1162 c = c.parentNode 1163 return None 1164 1165 def _get_containing_entref(node): 1166 c = node.parentNode 1167 while c is not None: 1168 if c.nodeType == Node.ENTITY_REFERENCE_NODE: 1169 return c 1170 c = c.parentNode 1171 return None 1172 1173 1174 class Comment(CharacterData): 1175 nodeType = Node.COMMENT_NODE 1176 nodeName = "#comment" 1177 1178 def __init__(self, data): 1179 CharacterData.__init__(self) 1180 self._data = data 1181 1182 def writexml(self, writer, indent="", addindent="", newl=""): 1183 if "--" in self.data: 1184 raise ValueError("'--' is not allowed in a comment node") 1185 writer.write("%s<!--%s-->%s" % (indent, self.data, newl)) 1186 1187 1188 class CDATASection(Text): 1189 __slots__ = () 1190 1191 nodeType = Node.CDATA_SECTION_NODE 1192 nodeName = "#cdata-section" 1193 1194 def writexml(self, writer, indent="", addindent="", newl=""): 1195 if self.data.find("]]>") >= 0: 1196 raise ValueError("']]>' not allowed in a CDATA section") 1197 writer.write("<![CDATA[%s]]>" % self.data) 1198 1199 1200 class ReadOnlySequentialNamedNodeMap(object): 1201 __slots__ = '_seq', 1202 1203 def __init__(self, seq=()): 1204 # seq should be a list or tuple 1205 self._seq = seq 1206 1207 def __len__(self): 1208 return len(self._seq) 1209 1210 def _get_length(self): 1211 return len(self._seq) 1212 1213 def getNamedItem(self, name): 1214 for n in self._seq: 1215 if n.nodeName == name: 1216 return n 1217 1218 def getNamedItemNS(self, namespaceURI, localName): 1219 for n in self._seq: 1220 if n.namespaceURI == namespaceURI and n.localName == localName: 1221 return n 1222 1223 def __getitem__(self, name_or_tuple): 1224 if isinstance(name_or_tuple, tuple): 1225 node = self.getNamedItemNS(*name_or_tuple) 1226 else: 1227 node = self.getNamedItem(name_or_tuple) 1228 if node is None: 1229 raise KeyError(name_or_tuple) 1230 return node 1231 1232 def item(self, index): 1233 if index < 0: 1234 return None 1235 try: 1236 return self._seq[index] 1237 except IndexError: 1238 return None 1239 1240 def removeNamedItem(self, name): 1241 raise xml.dom.NoModificationAllowedErr( 1242 "NamedNodeMap instance is read-only") 1243 1244 def removeNamedItemNS(self, namespaceURI, localName): 1245 raise xml.dom.NoModificationAllowedErr( 1246 "NamedNodeMap instance is read-only") 1247 1248 def setNamedItem(self, node): 1249 raise xml.dom.NoModificationAllowedErr( 1250 "NamedNodeMap instance is read-only") 1251 1252 def setNamedItemNS(self, node): 1253 raise xml.dom.NoModificationAllowedErr( 1254 "NamedNodeMap instance is read-only") 1255 1256 def __getstate__(self): 1257 return [self._seq] 1258 1259 def __setstate__(self, state): 1260 self._seq = state[0] 1261 1262 defproperty(ReadOnlySequentialNamedNodeMap, "length", 1263 doc="Number of entries in the NamedNodeMap.") 1264 1265 1266 class Identified: 1267 """Mix-in class that supports the publicId and systemId attributes.""" 1268 1269 __slots__ = 'publicId', 'systemId' 1270 1271 def _identified_mixin_init(self, publicId, systemId): 1272 self.publicId = publicId 1273 self.systemId = systemId 1274 1275 def _get_publicId(self): 1276 return self.publicId 1277 1278 def _get_systemId(self): 1279 return self.systemId 1280 1281 class DocumentType(Identified, Childless, Node): 1282 nodeType = Node.DOCUMENT_TYPE_NODE 1283 nodeValue = None 1284 name = None 1285 publicId = None 1286 systemId = None 1287 internalSubset = None 1288 1289 def __init__(self, qualifiedName): 1290 self.entities = ReadOnlySequentialNamedNodeMap() 1291 self.notations = ReadOnlySequentialNamedNodeMap() 1292 if qualifiedName: 1293 prefix, localname = _nssplit(qualifiedName) 1294 self.name = localname 1295 self.nodeName = self.name 1296 1297 def _get_internalSubset(self): 1298 return self.internalSubset 1299 1300 def cloneNode(self, deep): 1301 if self.ownerDocument is None: 1302 # it's ok 1303 clone = DocumentType(None) 1304 clone.name = self.name 1305 clone.nodeName = self.name 1306 operation = xml.dom.UserDataHandler.NODE_CLONED 1307 if deep: 1308 clone.entities._seq = [] 1309 clone.notations._seq = [] 1310 for n in self.notations._seq: 1311 notation = Notation(n.nodeName, n.publicId, n.systemId) 1312 clone.notations._seq.append(notation) 1313 n._call_user_data_handler(operation, n, notation) 1314 for e in self.entities._seq: 1315 entity = Entity(e.nodeName, e.publicId, e.systemId, 1316 e.notationName) 1317 entity.actualEncoding = e.actualEncoding 1318 entity.encoding = e.encoding 1319 entity.version = e.version 1320 clone.entities._seq.append(entity) 1321 e._call_user_data_handler(operation, n, entity) 1322 self._call_user_data_handler(operation, self, clone) 1323 return clone 1324 else: 1325 return None 1326 1327 def writexml(self, writer, indent="", addindent="", newl=""): 1328 writer.write("<!DOCTYPE ") 1329 writer.write(self.name) 1330 if self.publicId: 1331 writer.write("%s PUBLIC '%s'%s '%s'" 1332 % (newl, self.publicId, newl, self.systemId)) 1333 elif self.systemId: 1334 writer.write("%s SYSTEM '%s'" % (newl, self.systemId)) 1335 if self.internalSubset is not None: 1336 writer.write(" [") 1337 writer.write(self.internalSubset) 1338 writer.write("]") 1339 writer.write(">"+newl) 1340 1341 class Entity(Identified, Node): 1342 attributes = None 1343 nodeType = Node.ENTITY_NODE 1344 nodeValue = None 1345 1346 actualEncoding = None 1347 encoding = None 1348 version = None 1349 1350 def __init__(self, name, publicId, systemId, notation): 1351 self.nodeName = name 1352 self.notationName = notation 1353 self.childNodes = NodeList() 1354 self._identified_mixin_init(publicId, systemId) 1355 1356 def _get_actualEncoding(self): 1357 return self.actualEncoding 1358 1359 def _get_encoding(self): 1360 return self.encoding 1361 1362 def _get_version(self): 1363 return self.version 1364 1365 def appendChild(self, newChild): 1366 raise xml.dom.HierarchyRequestErr( 1367 "cannot append children to an entity node") 1368 1369 def insertBefore(self, newChild, refChild): 1370 raise xml.dom.HierarchyRequestErr( 1371 "cannot insert children below an entity node") 1372 1373 def removeChild(self, oldChild): 1374 raise xml.dom.HierarchyRequestErr( 1375 "cannot remove children from an entity node") 1376 1377 def replaceChild(self, newChild, oldChild): 1378 raise xml.dom.HierarchyRequestErr( 1379 "cannot replace children of an entity node") 1380 1381 class Notation(Identified, Childless, Node): 1382 nodeType = Node.NOTATION_NODE 1383 nodeValue = None 1384 1385 def __init__(self, name, publicId, systemId): 1386 self.nodeName = name 1387 self._identified_mixin_init(publicId, systemId) 1388 1389 1390 class DOMImplementation(DOMImplementationLS): 1391 _features = [("core", "1.0"), 1392 ("core", "2.0"), 1393 ("core", None), 1394 ("xml", "1.0"), 1395 ("xml", "2.0"), 1396 ("xml", None), 1397 ("ls-load", "3.0"), 1398 ("ls-load", None), 1399 ] 1400 1401 def hasFeature(self, feature, version): 1402 if version == "": 1403 version = None 1404 return (feature.lower(), version) in self._features 1405 1406 def createDocument(self, namespaceURI, qualifiedName, doctype): 1407 if doctype and doctype.parentNode is not None: 1408 raise xml.dom.WrongDocumentErr( 1409 "doctype object owned by another DOM tree") 1410 doc = self._create_document() 1411 1412 add_root_element = not (namespaceURI is None 1413 and qualifiedName is None 1414 and doctype is None) 1415 1416 if not qualifiedName and add_root_element: 1417 # The spec is unclear what to raise here; SyntaxErr 1418 # would be the other obvious candidate. Since Xerces raises 1419 # InvalidCharacterErr, and since SyntaxErr is not listed 1420 # for createDocument, that seems to be the better choice. 1421 # XXX: need to check for illegal characters here and in 1422 # createElement. 1423 1424 # DOM Level III clears this up when talking about the return value 1425 # of this function. If namespaceURI, qName and DocType are 1426 # Null the document is returned without a document element 1427 # Otherwise if doctype or namespaceURI are not None 1428 # Then we go back to the above problem 1429 raise xml.dom.InvalidCharacterErr("Element with no name") 1430 1431 if add_root_element: 1432 prefix, localname = _nssplit(qualifiedName) 1433 if prefix == "xml" \ 1434 and namespaceURI != "http://www.w3.org/XML/1998/namespace": 1435 raise xml.dom.NamespaceErr("illegal use of 'xml' prefix") 1436 if prefix and not namespaceURI: 1437 raise xml.dom.NamespaceErr( 1438 "illegal use of prefix without namespaces") 1439 element = doc.createElementNS(namespaceURI, qualifiedName) 1440 if doctype: 1441 doc.appendChild(doctype) 1442 doc.appendChild(element) 1443 1444 if doctype: 1445 doctype.parentNode = doctype.ownerDocument = doc 1446 1447 doc.doctype = doctype 1448 doc.implementation = self 1449 return doc 1450 1451 def createDocumentType(self, qualifiedName, publicId, systemId): 1452 doctype = DocumentType(qualifiedName) 1453 doctype.publicId = publicId 1454 doctype.systemId = systemId 1455 return doctype 1456 1457 # DOM Level 3 (WD 9 April 2002) 1458 1459 def getInterface(self, feature): 1460 if self.hasFeature(feature, None): 1461 return self 1462 else: 1463 return None 1464 1465 # internal 1466 def _create_document(self): 1467 return Document() 1468 1469 class ElementInfo(object): 1470 """Object that represents content-model information for an element. 1471 1472 This implementation is not expected to be used in practice; DOM 1473 builders should provide implementations which do the right thing 1474 using information available to it. 1475 1476 """ 1477 1478 __slots__ = 'tagName', 1479 1480 def __init__(self, name): 1481 self.tagName = name 1482 1483 def getAttributeType(self, aname): 1484 return _no_type 1485 1486 def getAttributeTypeNS(self, namespaceURI, localName): 1487 return _no_type 1488 1489 def isElementContent(self): 1490 return False 1491 1492 def isEmpty(self): 1493 """Returns true iff this element is declared to have an EMPTY 1494 content model.""" 1495 return False 1496 1497 def isId(self, aname): 1498 """Returns true iff the named attribute is a DTD-style ID.""" 1499 return False 1500 1501 def isIdNS(self, namespaceURI, localName): 1502 """Returns true iff the identified attribute is a DTD-style ID.""" 1503 return False 1504 1505 def __getstate__(self): 1506 return self.tagName 1507 1508 def __setstate__(self, state): 1509 self.tagName = state 1510 1511 def _clear_id_cache(node): 1512 if node.nodeType == Node.DOCUMENT_NODE: 1513 node._id_cache.clear() 1514 node._id_search_stack = None 1515 elif _in_document(node): 1516 node.ownerDocument._id_cache.clear() 1517 node.ownerDocument._id_search_stack= None 1518 1519 class Document(Node, DocumentLS): 1520 __slots__ = ('_elem_info', 'doctype', 1521 '_id_search_stack', 'childNodes', '_id_cache') 1522 _child_node_types = (Node.ELEMENT_NODE, Node.PROCESSING_INSTRUCTION_NODE, 1523 Node.COMMENT_NODE, Node.DOCUMENT_TYPE_NODE) 1524 1525 implementation = DOMImplementation() 1526 nodeType = Node.DOCUMENT_NODE 1527 nodeName = "#document" 1528 nodeValue = None 1529 attributes = None 1530 parentNode = None 1531 previousSibling = nextSibling = None 1532 1533 1534 # Document attributes from Level 3 (WD 9 April 2002) 1535 1536 actualEncoding = None 1537 encoding = None 1538 standalone = None 1539 version = None 1540 strictErrorChecking = False 1541 errorHandler = None 1542 documentURI = None 1543 1544 _magic_id_count = 0 1545 1546 def __init__(self): 1547 self.doctype = None 1548 self.childNodes = NodeList() 1549 # mapping of (namespaceURI, localName) -> ElementInfo 1550 # and tagName -> ElementInfo 1551 self._elem_info = {} 1552 self._id_cache = {} 1553 self._id_search_stack = None 1554 1555 def _get_elem_info(self, element): 1556 if element.namespaceURI: 1557 key = element.namespaceURI, element.localName 1558 else: 1559 key = element.tagName 1560 return self._elem_info.get(key) 1561 1562 def _get_actualEncoding(self): 1563 return self.actualEncoding 1564 1565 def _get_doctype(self): 1566 return self.doctype 1567 1568 def _get_documentURI(self): 1569 return self.documentURI 1570 1571 def _get_encoding(self): 1572 return self.encoding 1573 1574 def _get_errorHandler(self): 1575 return self.errorHandler 1576 1577 def _get_standalone(self): 1578 return self.standalone 1579 1580 def _get_strictErrorChecking(self): 1581 return self.strictErrorChecking 1582 1583 def _get_version(self): 1584 return self.version 1585 1586 def appendChild(self, node): 1587 if node.nodeType not in self._child_node_types: 1588 raise xml.dom.HierarchyRequestErr( 1589 "%s cannot be child of %s" % (repr(node), repr(self))) 1590 if node.parentNode is not None: 1591 # This needs to be done before the next test since this 1592 # may *be* the document element, in which case it should 1593 # end up re-ordered to the end. 1594 node.parentNode.removeChild(node) 1595 1596 if node.nodeType == Node.ELEMENT_NODE \ 1597 and self._get_documentElement(): 1598 raise xml.dom.HierarchyRequestErr( 1599 "two document elements disallowed") 1600 return Node.appendChild(self, node) 1601 1602 def removeChild(self, oldChild): 1603 try: 1604 self.childNodes.remove(oldChild) 1605 except ValueError: 1606 raise xml.dom.NotFoundErr() 1607 oldChild.nextSibling = oldChild.previousSibling = None 1608 oldChild.parentNode = None 1609 if self.documentElement is oldChild: 1610 self.documentElement = None 1611 1612 return oldChild 1613 1614 def _get_documentElement(self): 1615 for node in self.childNodes: 1616 if node.nodeType == Node.ELEMENT_NODE: 1617 return node 1618 1619 def unlink(self): 1620 if self.doctype is not None: 1621 self.doctype.unlink() 1622 self.doctype = None 1623 Node.unlink(self) 1624 1625 def cloneNode(self, deep): 1626 if not deep: 1627 return None 1628 clone = self.implementation.createDocument(None, None, None) 1629 clone.encoding = self.encoding 1630 clone.standalone = self.standalone 1631 clone.version = self.version 1632 for n in self.childNodes: 1633 childclone = _clone_node(n, deep, clone) 1634 assert childclone.ownerDocument.isSameNode(clone) 1635 clone.childNodes.append(childclone) 1636 if childclone.nodeType == Node.DOCUMENT_NODE: 1637 assert clone.documentElement is None 1638 elif childclone.nodeType == Node.DOCUMENT_TYPE_NODE: 1639 assert clone.doctype is None 1640 clone.doctype = childclone 1641 childclone.parentNode = clone 1642 self._call_user_data_handler(xml.dom.UserDataHandler.NODE_CLONED, 1643 self, clone) 1644 return clone 1645 1646 def createDocumentFragment(self): 1647 d = DocumentFragment() 1648 d.ownerDocument = self 1649 return d 1650 1651 def createElement(self, tagName): 1652 e = Element(tagName) 1653 e.ownerDocument = self 1654 return e 1655 1656 def createTextNode(self, data): 1657 if not isinstance(data, str): 1658 raise TypeError("node contents must be a string") 1659 t = Text() 1660 t.data = data 1661 t.ownerDocument = self 1662 return t 1663 1664 def createCDATASection(self, data): 1665 if not isinstance(data, str): 1666 raise TypeError("node contents must be a string") 1667 c = CDATASection() 1668 c.data = data 1669 c.ownerDocument = self 1670 return c 1671 1672 def createComment(self, data): 1673 c = Comment(data) 1674 c.ownerDocument = self 1675 return c 1676 1677 def createProcessingInstruction(self, target, data): 1678 p = ProcessingInstruction(target, data) 1679 p.ownerDocument = self 1680 return p 1681 1682 def createAttribute(self, qName): 1683 a = Attr(qName) 1684 a.ownerDocument = self 1685 a.value = "" 1686 return a 1687 1688 def createElementNS(self, namespaceURI, qualifiedName): 1689 prefix, localName = _nssplit(qualifiedName) 1690 e = Element(qualifiedName, namespaceURI, prefix) 1691 e.ownerDocument = self 1692 return e 1693 1694 def createAttributeNS(self, namespaceURI, qualifiedName): 1695 prefix, localName = _nssplit(qualifiedName) 1696 a = Attr(qualifiedName, namespaceURI, localName, prefix) 1697 a.ownerDocument = self 1698 a.value = "" 1699 return a 1700 1701 # A couple of implementation-specific helpers to create node types 1702 # not supported by the W3C DOM specs: 1703 1704 def _create_entity(self, name, publicId, systemId, notationName): 1705 e = Entity(name, publicId, systemId, notationName) 1706 e.ownerDocument = self 1707 return e 1708 1709 def _create_notation(self, name, publicId, systemId): 1710 n = Notation(name, publicId, systemId) 1711 n.ownerDocument = self 1712 return n 1713 1714 def getElementById(self, id): 1715 if id in self._id_cache: 1716 return self._id_cache[id] 1717 if not (self._elem_info or self._magic_id_count): 1718 return None 1719 1720 stack = self._id_search_stack 1721 if stack is None: 1722 # we never searched before, or the cache has been cleared 1723 stack = [self.documentElement] 1724 self._id_search_stack = stack 1725 elif not stack: 1726 # Previous search was completed and cache is still valid; 1727 # no matching node. 1728 return None 1729 1730 result = None 1731 while stack: 1732 node = stack.pop() 1733 # add child elements to stack for continued searching 1734 stack.extend([child for child in node.childNodes 1735 if child.nodeType in _nodeTypes_with_children]) 1736 # check this node 1737 info = self._get_elem_info(node) 1738 if info: 1739 # We have to process all ID attributes before 1740 # returning in order to get all the attributes set to 1741 # be IDs using Element.setIdAttribute*(). 1742 for attr in node.attributes.values(): 1743 if attr.namespaceURI: 1744 if info.isIdNS(attr.namespaceURI, attr.localName): 1745 self._id_cache[attr.value] = node 1746 if attr.value == id: 1747 result = node 1748 elif not node._magic_id_nodes: 1749 break 1750 elif info.isId(attr.name): 1751 self._id_cache[attr.value] = node 1752 if attr.value == id: 1753 result = node 1754 elif not node._magic_id_nodes: 1755 break 1756 elif attr._is_id: 1757 self._id_cache[attr.value] = node 1758 if attr.value == id: 1759 result = node 1760 elif node._magic_id_nodes == 1: 1761 break 1762 elif node._magic_id_nodes: 1763 for attr in node.attributes.values(): 1764 if attr._is_id: 1765 self._id_cache[attr.value] = node 1766 if attr.value == id: 1767 result = node 1768 if result is not None: 1769 break 1770 return result 1771 1772 def getElementsByTagName(self, name): 1773 return _get_elements_by_tagName_helper(self, name, NodeList()) 1774 1775 def getElementsByTagNameNS(self, namespaceURI, localName): 1776 return _get_elements_by_tagName_ns_helper( 1777 self, namespaceURI, localName, NodeList()) 1778 1779 def isSupported(self, feature, version): 1780 return self.implementation.hasFeature(feature, version) 1781 1782 def importNode(self, node, deep): 1783 if node.nodeType == Node.DOCUMENT_NODE: 1784 raise xml.dom.NotSupportedErr("cannot import document nodes") 1785 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1786 raise xml.dom.NotSupportedErr("cannot import document type nodes") 1787 return _clone_node(node, deep, self) 1788 1789 def writexml(self, writer, indent="", addindent="", newl="", encoding=None): 1790 if encoding is None: 1791 writer.write('<?xml version="1.0" ?>'+newl) 1792 else: 1793 writer.write('<?xml version="1.0" encoding="%s"?>%s' % ( 1794 encoding, newl)) 1795 for node in self.childNodes: 1796 node.writexml(writer, indent, addindent, newl) 1797 1798 # DOM Level 3 (WD 9 April 2002) 1799 1800 def renameNode(self, n, namespaceURI, name): 1801 if n.ownerDocument is not self: 1802 raise xml.dom.WrongDocumentErr( 1803 "cannot rename nodes from other documents;\n" 1804 "expected %s,\nfound %s" % (self, n.ownerDocument)) 1805 if n.nodeType not in (Node.ELEMENT_NODE, Node.ATTRIBUTE_NODE): 1806 raise xml.dom.NotSupportedErr( 1807 "renameNode() only applies to element and attribute nodes") 1808 if namespaceURI != EMPTY_NAMESPACE: 1809 if ':' in name: 1810 prefix, localName = name.split(':', 1) 1811 if ( prefix == "xmlns" 1812 and namespaceURI != xml.dom.XMLNS_NAMESPACE): 1813 raise xml.dom.NamespaceErr( 1814 "illegal use of 'xmlns' prefix") 1815 else: 1816 if ( name == "xmlns" 1817 and namespaceURI != xml.dom.XMLNS_NAMESPACE 1818 and n.nodeType == Node.ATTRIBUTE_NODE): 1819 raise xml.dom.NamespaceErr( 1820 "illegal use of the 'xmlns' attribute") 1821 prefix = None 1822 localName = name 1823 else: 1824 prefix = None 1825 localName = None 1826 if n.nodeType == Node.ATTRIBUTE_NODE: 1827 element = n.ownerElement 1828 if element is not None: 1829 is_id = n._is_id 1830 element.removeAttributeNode(n) 1831 else: 1832 element = None 1833 n.prefix = prefix 1834 n._localName = localName 1835 n.namespaceURI = namespaceURI 1836 n.nodeName = name 1837 if n.nodeType == Node.ELEMENT_NODE: 1838 n.tagName = name 1839 else: 1840 # attribute node 1841 n.name = name 1842 if element is not None: 1843 element.setAttributeNode(n) 1844 if is_id: 1845 element.setIdAttributeNode(n) 1846 # It's not clear from a semantic perspective whether we should 1847 # call the user data handlers for the NODE_RENAMED event since 1848 # we're re-using the existing node. The draft spec has been 1849 # interpreted as meaning "no, don't call the handler unless a 1850 # new node is created." 1851 return n 1852 1853 defproperty(Document, "documentElement", 1854 doc="Top-level element of this document.") 1855 1856 1857 def _clone_node(node, deep, newOwnerDocument): 1858 """ 1859 Clone a node and give it the new owner document. 1860 Called by Node.cloneNode and Document.importNode 1861 """ 1862 if node.ownerDocument.isSameNode(newOwnerDocument): 1863 operation = xml.dom.UserDataHandler.NODE_CLONED 1864 else: 1865 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1866 if node.nodeType == Node.ELEMENT_NODE: 1867 clone = newOwnerDocument.createElementNS(node.namespaceURI, 1868 node.nodeName) 1869 for attr in node.attributes.values(): 1870 clone.setAttributeNS(attr.namespaceURI, attr.nodeName, attr.value) 1871 a = clone.getAttributeNodeNS(attr.namespaceURI, attr.localName) 1872 a.specified = attr.specified 1873 1874 if deep: 1875 for child in node.childNodes: 1876 c = _clone_node(child, deep, newOwnerDocument) 1877 clone.appendChild(c) 1878 1879 elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: 1880 clone = newOwnerDocument.createDocumentFragment() 1881 if deep: 1882 for child in node.childNodes: 1883 c = _clone_node(child, deep, newOwnerDocument) 1884 clone.appendChild(c) 1885 1886 elif node.nodeType == Node.TEXT_NODE: 1887 clone = newOwnerDocument.createTextNode(node.data) 1888 elif node.nodeType == Node.CDATA_SECTION_NODE: 1889 clone = newOwnerDocument.createCDATASection(node.data) 1890 elif node.nodeType == Node.PROCESSING_INSTRUCTION_NODE: 1891 clone = newOwnerDocument.createProcessingInstruction(node.target, 1892 node.data) 1893 elif node.nodeType == Node.COMMENT_NODE: 1894 clone = newOwnerDocument.createComment(node.data) 1895 elif node.nodeType == Node.ATTRIBUTE_NODE: 1896 clone = newOwnerDocument.createAttributeNS(node.namespaceURI, 1897 node.nodeName) 1898 clone.specified = True 1899 clone.value = node.value 1900 elif node.nodeType == Node.DOCUMENT_TYPE_NODE: 1901 assert node.ownerDocument is not newOwnerDocument 1902 operation = xml.dom.UserDataHandler.NODE_IMPORTED 1903 clone = newOwnerDocument.implementation.createDocumentType( 1904 node.name, node.publicId, node.systemId) 1905 clone.ownerDocument = newOwnerDocument 1906 if deep: 1907 clone.entities._seq = [] 1908 clone.notations._seq = [] 1909 for n in node.notations._seq: 1910 notation = Notation(n.nodeName, n.publicId, n.systemId) 1911 notation.ownerDocument = newOwnerDocument 1912 clone.notations._seq.append(notation) 1913 if hasattr(n, '_call_user_data_handler'): 1914 n._call_user_data_handler(operation, n, notation) 1915 for e in node.entities._seq: 1916 entity = Entity(e.nodeName, e.publicId, e.systemId, 1917 e.notationName) 1918 entity.actualEncoding = e.actualEncoding 1919 entity.encoding = e.encoding 1920 entity.version = e.version 1921 entity.ownerDocument = newOwnerDocument 1922 clone.entities._seq.append(entity) 1923 if hasattr(e, '_call_user_data_handler'): 1924 e._call_user_data_handler(operation, n, entity) 1925 else: 1926 # Note the cloning of Document and DocumentType nodes is 1927 # implementation specific. minidom handles those cases 1928 # directly in the cloneNode() methods. 1929 raise xml.dom.NotSupportedErr("Cannot clone node %s" % repr(node)) 1930 1931 # Check for _call_user_data_handler() since this could conceivably 1932 # used with other DOM implementations (one of the FourThought 1933 # DOMs, perhaps?). 1934 if hasattr(node, '_call_user_data_handler'): 1935 node._call_user_data_handler(operation, node, clone) 1936 return clone 1937 1938 1939 def _nssplit(qualifiedName): 1940 fields = qualifiedName.split(':', 1) 1941 if len(fields) == 2: 1942 return fields 1943 else: 1944 return (None, fields[0]) 1945 1946 1947 def _do_pulldom_parse(func, args, kwargs): 1948 events = func(*args, **kwargs) 1949 toktype, rootNode = events.getEvent() 1950 events.expandNode(rootNode) 1951 events.clear() 1952 return rootNode 1953 1954 def parse(file, parser=None, bufsize=None): 1955 """Parse a file into a DOM by filename or file object.""" 1956 if parser is None and not bufsize: 1957 from xml.dom import expatbuilder 1958 return expatbuilder.parse(file) 1959 else: 1960 from xml.dom import pulldom 1961 return _do_pulldom_parse(pulldom.parse, (file,), 1962 {'parser': parser, 'bufsize': bufsize}) 1963 1964 def parseString(string, parser=None): 1965 """Parse a file into a DOM from a string.""" 1966 if parser is None: 1967 from xml.dom import expatbuilder 1968 return expatbuilder.parseString(string) 1969 else: 1970 from xml.dom import pulldom 1971 return _do_pulldom_parse(pulldom.parseString, (string,), 1972 {'parser': parser}) 1973 1974 def getDOMImplementation(features=None): 1975 if features: 1976 if isinstance(features, str): 1977 features = domreg._parse_feature_string(features) 1978 for f, v in features: 1979 if not Document.implementation.hasFeature(f, v): 1980 return None 1981 return Document.implementation 1982