1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: DTM.java 468653 2006-10-28 07:07:05Z minchau $ 20 */ 21 package org.apache.xml.dtm; 22 23 import javax.xml.transform.SourceLocator; 24 25 import org.apache.xml.utils.XMLString; 26 27 /** 28 * <code>DTM</code> is an XML document model expressed as a table 29 * rather than an object tree. It attempts to provide an interface to 30 * a parse tree that has very little object creation. (DTM 31 * implementations may also support incremental construction of the 32 * model, but that's hidden from the DTM API.) 33 * 34 * <p>Nodes in the DTM are identified by integer "handles". A handle must 35 * be unique within a process, and carries both node identification and 36 * document identification. It must be possible to compare two handles 37 * (and thus their nodes) for identity with "==".</p> 38 * 39 * <p>Namespace URLs, local-names, and expanded-names can all be 40 * represented by and tested as integer ID values. An expanded name 41 * represents (and may or may not directly contain) a combination of 42 * the URL ID, and the local-name ID. Note that the namespace URL id 43 * can be 0, which should have the meaning that the namespace is null. 44 * For consistancy, zero should not be used for a local-name index. </p> 45 * 46 * <p>Text content of a node is represented by an index and length, 47 * permitting efficient storage such as a shared FastStringBuffer.</p> 48 * 49 * <p>The model of the tree, as well as the general navigation model, 50 * is that of XPath 1.0, for the moment. The model will eventually be 51 * adapted to match the XPath 2.0 data model, XML Schema, and 52 * InfoSet.</p> 53 * 54 * <p>DTM does _not_ directly support the W3C's Document Object 55 * Model. However, it attempts to come close enough that an 56 * implementation of DTM can be created that wraps a DOM and vice 57 * versa.</p> 58 * 59 * <p><strong>Please Note:</strong> The DTM API is still 60 * <strong>Subject To Change.</strong> This wouldn't affect most 61 * users, but might require updating some extensions.</p> 62 * 63 * <p> The largest change being contemplated is a reconsideration of 64 * the Node Handle representation. We are still not entirely sure 65 * that an integer packed with two numeric subfields is really the 66 * best solution. It has been suggested that we move up to a Long, to 67 * permit more nodes per document without having to reduce the number 68 * of slots in the DTMManager. There's even been a proposal that we 69 * replace these integers with "cursor" objects containing the 70 * internal node id and a pointer to the actual DTM object; this might 71 * reduce the need to continuously consult the DTMManager to retrieve 72 * the latter, and might provide a useful "hook" back into normal Java 73 * heap management. But changing this datatype would have huge impact 74 * on Xalan's internals -- especially given Java's lack of C-style 75 * typedefs -- so we won't cut over unless we're convinced the new 76 * solution really would be an improvement!</p> 77 * */ 78 public interface DTM 79 { 80 81 /** 82 * Null node handles are represented by this value. 83 */ 84 public static final int NULL = -1; 85 86 // These nodeType mnemonics and values are deliberately the same as those 87 // used by the DOM, for convenient mapping 88 // 89 // %REVIEW% Should we actually define these as initialized to, 90 // eg. org.w3c.dom.Document.ELEMENT_NODE? 91 92 /** 93 * The node is a <code>Root</code>. 94 */ 95 public static final short ROOT_NODE = 0; 96 97 /** 98 * The node is an <code>Element</code>. 99 */ 100 public static final short ELEMENT_NODE = 1; 101 102 /** 103 * The node is an <code>Attr</code>. 104 */ 105 public static final short ATTRIBUTE_NODE = 2; 106 107 /** 108 * The node is a <code>Text</code> node. 109 */ 110 public static final short TEXT_NODE = 3; 111 112 /** 113 * The node is a <code>CDATASection</code>. 114 */ 115 public static final short CDATA_SECTION_NODE = 4; 116 117 /** 118 * The node is an <code>EntityReference</code>. 119 */ 120 public static final short ENTITY_REFERENCE_NODE = 5; 121 122 /** 123 * The node is an <code>Entity</code>. 124 */ 125 public static final short ENTITY_NODE = 6; 126 127 /** 128 * The node is a <code>ProcessingInstruction</code>. 129 */ 130 public static final short PROCESSING_INSTRUCTION_NODE = 7; 131 132 /** 133 * The node is a <code>Comment</code>. 134 */ 135 public static final short COMMENT_NODE = 8; 136 137 /** 138 * The node is a <code>Document</code>. 139 */ 140 public static final short DOCUMENT_NODE = 9; 141 142 /** 143 * The node is a <code>DocumentType</code>. 144 */ 145 public static final short DOCUMENT_TYPE_NODE = 10; 146 147 /** 148 * The node is a <code>DocumentFragment</code>. 149 */ 150 public static final short DOCUMENT_FRAGMENT_NODE = 11; 151 152 /** 153 * The node is a <code>Notation</code>. 154 */ 155 public static final short NOTATION_NODE = 12; 156 157 /** 158 * The node is a <code>namespace node</code>. Note that this is not 159 * currently a node type defined by the DOM API. 160 */ 161 public static final short NAMESPACE_NODE = 13; 162 163 /** 164 * The number of valid nodetypes. 165 */ 166 public static final short NTYPES = 14; 167 168 // ========= DTM Implementation Control Functions. ============== 169 // %TBD% RETIRED -- do via setFeature if needed. Remove from impls. 170 // public void setParseBlockSize(int blockSizeSuggestion); 171 172 /** 173 * Set an implementation dependent feature. 174 * <p> 175 * %REVIEW% Do we really expect to set features on DTMs? 176 * 177 * @param featureId A feature URL. 178 * @param state true if this feature should be on, false otherwise. 179 */ 180 public void setFeature(String featureId, boolean state); 181 182 /** 183 * Set a run time property for this DTM instance. 184 * 185 * @param property a <code>String</code> value 186 * @param value an <code>Object</code> value 187 */ 188 public void setProperty(String property, Object value); 189 190 // ========= Document Navigation Functions ========= 191 192 /** 193 * This returns a stateless "traverser", that can navigate over an 194 * XPath axis, though not in document order. 195 * 196 * @param axis One of Axes.ANCESTORORSELF, etc. 197 * 198 * @return A DTMAxisIterator, or null if the givin axis isn't supported. 199 */ 200 public DTMAxisTraverser getAxisTraverser(final int axis); 201 202 /** 203 * This is a shortcut to the iterators that implement 204 * XPath axes. 205 * Returns a bare-bones iterator that must be initialized 206 * with a start node (using iterator.setStartNode()). 207 * 208 * @param axis One of Axes.ANCESTORORSELF, etc. 209 * 210 * @return A DTMAxisIterator, or null if the givin axis isn't supported. 211 */ 212 public DTMAxisIterator getAxisIterator(final int axis); 213 214 /** 215 * Get an iterator that can navigate over an XPath Axis, predicated by 216 * the extended type ID. 217 * 218 * @param axis 219 * @param type An extended type ID. 220 * 221 * @return A DTMAxisIterator, or null if the givin axis isn't supported. 222 */ 223 public DTMAxisIterator getTypedAxisIterator(final int axis, final int type); 224 225 /** 226 * Given a node handle, test if it has child nodes. 227 * <p> %REVIEW% This is obviously useful at the DOM layer, where it 228 * would permit testing this without having to create a proxy 229 * node. It's less useful in the DTM API, where 230 * (dtm.getFirstChild(nodeHandle)!=DTM.NULL) is just as fast and 231 * almost as self-evident. But it's a convenience, and eases porting 232 * of DOM code to DTM. </p> 233 * 234 * @param nodeHandle int Handle of the node. 235 * @return int true if the given node has child nodes. 236 */ 237 public boolean hasChildNodes(int nodeHandle); 238 239 /** 240 * Given a node handle, get the handle of the node's first child. 241 * 242 * @param nodeHandle int Handle of the node. 243 * @return int DTM node-number of first child, 244 * or DTM.NULL to indicate none exists. 245 */ 246 public int getFirstChild(int nodeHandle); 247 248 /** 249 * Given a node handle, get the handle of the node's last child. 250 * 251 * @param nodeHandle int Handle of the node. 252 * @return int Node-number of last child, 253 * or DTM.NULL to indicate none exists. 254 */ 255 public int getLastChild(int nodeHandle); 256 257 /** 258 * Retrieves an attribute node by local name and namespace URI 259 * 260 * %TBD% Note that we currently have no way to support 261 * the DOM's old getAttribute() call, which accesses only the qname. 262 * 263 * @param elementHandle Handle of the node upon which to look up this attribute. 264 * @param namespaceURI The namespace URI of the attribute to 265 * retrieve, or null. 266 * @param name The local name of the attribute to 267 * retrieve. 268 * @return The attribute node handle with the specified name ( 269 * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such 270 * attribute. 271 */ 272 public int getAttributeNode(int elementHandle, String namespaceURI, 273 String name); 274 275 /** 276 * Given a node handle, get the index of the node's first attribute. 277 * 278 * @param nodeHandle int Handle of the node. 279 * @return Handle of first attribute, or DTM.NULL to indicate none exists. 280 */ 281 public int getFirstAttribute(int nodeHandle); 282 283 /** 284 * Given a node handle, get the index of the node's first namespace node. 285 * 286 * @param nodeHandle handle to node, which should probably be an element 287 * node, but need not be. 288 * 289 * @param inScope true if all namespaces in scope should be 290 * returned, false if only the node's own 291 * namespace declarations should be returned. 292 * @return handle of first namespace, 293 * or DTM.NULL to indicate none exists. 294 */ 295 public int getFirstNamespaceNode(int nodeHandle, boolean inScope); 296 297 /** 298 * Given a node handle, advance to its next sibling. 299 * @param nodeHandle int Handle of the node. 300 * @return int Node-number of next sibling, 301 * or DTM.NULL to indicate none exists. 302 */ 303 public int getNextSibling(int nodeHandle); 304 305 /** 306 * Given a node handle, find its preceeding sibling. 307 * WARNING: DTM implementations may be asymmetric; in some, 308 * this operation has been resolved by search, and is relatively expensive. 309 * 310 * @param nodeHandle the id of the node. 311 * @return int Node-number of the previous sib, 312 * or DTM.NULL to indicate none exists. 313 */ 314 public int getPreviousSibling(int nodeHandle); 315 316 /** 317 * Given a node handle, advance to the next attribute. If an 318 * element, we advance to its first attribute; if an attr, we advance to 319 * the next attr of the same element. 320 * 321 * @param nodeHandle int Handle of the node. 322 * @return int DTM node-number of the resolved attr, 323 * or DTM.NULL to indicate none exists. 324 */ 325 public int getNextAttribute(int nodeHandle); 326 327 /** 328 * Given a namespace handle, advance to the next namespace in the same scope 329 * (local or local-plus-inherited, as selected by getFirstNamespaceNode) 330 * 331 * @param baseHandle handle to original node from where the first child 332 * was relative to (needed to return nodes in document order). 333 * @param namespaceHandle handle to node which must be of type 334 * NAMESPACE_NODE. 335 * NEEDSDOC @param inScope 336 * @return handle of next namespace, 337 * or DTM.NULL to indicate none exists. 338 */ 339 public int getNextNamespaceNode(int baseHandle, int namespaceHandle, 340 boolean inScope); 341 342 /** 343 * Given a node handle, find its parent node. 344 * 345 * @param nodeHandle the id of the node. 346 * @return int Node handle of parent, 347 * or DTM.NULL to indicate none exists. 348 */ 349 public int getParent(int nodeHandle); 350 351 /** 352 * Given a DTM which contains only a single document, 353 * find the Node Handle of the Document node. Note 354 * that if the DTM is configured so it can contain multiple 355 * documents, this call will return the Document currently 356 * under construction -- but may return null if it's between 357 * documents. Generally, you should use getOwnerDocument(nodeHandle) 358 * or getDocumentRoot(nodeHandle) instead. 359 * 360 * @return int Node handle of document, or DTM.NULL if a shared DTM 361 * can not tell us which Document is currently active. 362 */ 363 public int getDocument(); 364 365 /** 366 * Given a node handle, find the owning document node. This version mimics 367 * the behavior of the DOM call by the same name. 368 * 369 * @param nodeHandle the id of the node. 370 * @return int Node handle of owning document, or DTM.NULL if the node was 371 * a Document. 372 * @see #getDocumentRoot(int nodeHandle) 373 */ 374 public int getOwnerDocument(int nodeHandle); 375 376 /** 377 * Given a node handle, find the owning document node. 378 * 379 * @param nodeHandle the id of the node. 380 * @return int Node handle of owning document, or the node itself if it was 381 * a Document. (Note difference from DOM, where getOwnerDocument returns 382 * null for the Document node.) 383 * @see #getOwnerDocument(int nodeHandle) 384 */ 385 public int getDocumentRoot(int nodeHandle); 386 387 /** 388 * Get the string-value of a node as a String object 389 * (see http://www.w3.org/TR/xpath#data-model 390 * for the definition of a node's string-value). 391 * 392 * @param nodeHandle The node ID. 393 * 394 * @return A string object that represents the string-value of the given node. 395 */ 396 public XMLString getStringValue(int nodeHandle); 397 398 /** 399 * Get number of character array chunks in 400 * the string-value of a node. 401 * (see http://www.w3.org/TR/xpath#data-model 402 * for the definition of a node's string-value). 403 * Note that a single text node may have multiple text chunks. 404 * 405 * @param nodeHandle The node ID. 406 * 407 * @return number of character array chunks in 408 * the string-value of a node. 409 */ 410 public int getStringValueChunkCount(int nodeHandle); 411 412 /** 413 * Get a character array chunk in the string-value of a node. 414 * (see http://www.w3.org/TR/xpath#data-model 415 * for the definition of a node's string-value). 416 * Note that a single text node may have multiple text chunks. 417 * 418 * @param nodeHandle The node ID. 419 * @param chunkIndex Which chunk to get. 420 * @param startAndLen A two-integer array which, upon return, WILL 421 * BE FILLED with values representing the chunk's start position 422 * within the returned character buffer and the length of the chunk. 423 * @return The character array buffer within which the chunk occurs, 424 * setting startAndLen's contents as a side-effect. 425 */ 426 public char[] getStringValueChunk(int nodeHandle, int chunkIndex, 427 int[] startAndLen); 428 429 /** 430 * Given a node handle, return an ID that represents the node's expanded name. 431 * 432 * @param nodeHandle The handle to the node in question. 433 * 434 * @return the expanded-name id of the node. 435 */ 436 public int getExpandedTypeID(int nodeHandle); 437 438 /** 439 * Given an expanded name, return an ID. If the expanded-name does not 440 * exist in the internal tables, the entry will be created, and the ID will 441 * be returned. Any additional nodes that are created that have this 442 * expanded name will use this ID. 443 * 444 * NEEDSDOC @param namespace 445 * NEEDSDOC @param localName 446 * NEEDSDOC @param type 447 * 448 * @return the expanded-name id of the node. 449 */ 450 public int getExpandedTypeID(String namespace, String localName, int type); 451 452 /** 453 * Given an expanded-name ID, return the local name part. 454 * 455 * @param ExpandedNameID an ID that represents an expanded-name. 456 * @return String Local name of this node. 457 */ 458 public String getLocalNameFromExpandedNameID(int ExpandedNameID); 459 460 /** 461 * Given an expanded-name ID, return the namespace URI part. 462 * 463 * @param ExpandedNameID an ID that represents an expanded-name. 464 * @return String URI value of this node's namespace, or null if no 465 * namespace was resolved. 466 */ 467 public String getNamespaceFromExpandedNameID(int ExpandedNameID); 468 469 /** 470 * Given a node handle, return its DOM-style node name. This will 471 * include names such as #text or #document. 472 * 473 * @param nodeHandle the id of the node. 474 * @return String Name of this node, which may be an empty string. 475 * %REVIEW% Document when empty string is possible... 476 */ 477 public String getNodeName(int nodeHandle); 478 479 /** 480 * Given a node handle, return the XPath node name. This should be 481 * the name as described by the XPath data model, NOT the DOM-style 482 * name. 483 * 484 * @param nodeHandle the id of the node. 485 * @return String Name of this node. 486 */ 487 public String getNodeNameX(int nodeHandle); 488 489 /** 490 * Given a node handle, return its DOM-style localname. 491 * (As defined in Namespaces, this is the portion of the name after the 492 * prefix, if present, or the whole node name if no prefix exists) 493 * 494 * @param nodeHandle the id of the node. 495 * @return String Local name of this node. 496 */ 497 public String getLocalName(int nodeHandle); 498 499 /** 500 * Given a namespace handle, return the prefix that the namespace decl is 501 * mapping. 502 * Given a node handle, return the prefix used to map to the namespace. 503 * (As defined in Namespaces, this is the portion of the name before any 504 * colon character). 505 * 506 * <p> %REVIEW% Are you sure you want "" for no prefix? </p> 507 * 508 * @param nodeHandle the id of the node. 509 * @return String prefix of this node's name, or "" if no explicit 510 * namespace prefix was given. 511 */ 512 public String getPrefix(int nodeHandle); 513 514 /** 515 * Given a node handle, return its DOM-style namespace URI 516 * (As defined in Namespaces, this is the declared URI which this node's 517 * prefix -- or default in lieu thereof -- was mapped to.) 518 * @param nodeHandle the id of the node. 519 * @return String URI value of this node's namespace, or null if no 520 * namespace was resolved. 521 */ 522 public String getNamespaceURI(int nodeHandle); 523 524 /** 525 * Given a node handle, return its node value. This is mostly 526 * as defined by the DOM, but may ignore some conveniences. 527 * <p> 528 * @param nodeHandle The node id. 529 * @return String Value of this node, or null if not 530 * meaningful for this node type. 531 */ 532 public String getNodeValue(int nodeHandle); 533 534 /** 535 * Given a node handle, return its DOM-style node type. 536 * 537 * <p>%REVIEW% Generally, returning short is false economy. Return int?</p> 538 * 539 * @param nodeHandle The node id. 540 * @return int Node type, as per the DOM's Node._NODE constants. 541 */ 542 public short getNodeType(int nodeHandle); 543 544 /** 545 * Get the depth level of this node in the tree (equals 1 for 546 * a parentless node). 547 * 548 * @param nodeHandle The node id. 549 * @return the number of ancestors, plus one 550 * @xsl.usage internal 551 */ 552 public short getLevel(int nodeHandle); 553 554 // ============== Document query functions ============== 555 556 /** 557 * Tests whether DTM DOM implementation implements a specific feature and 558 * that feature is supported by this node. 559 * @param feature The name of the feature to test. 560 * @param version This is the version number of the feature to test. 561 * If the version is not 562 * specified, supporting any version of the feature will cause the 563 * method to return <code>true</code>. 564 * @return Returns <code>true</code> if the specified feature is 565 * supported on this node, <code>false</code> otherwise. 566 */ 567 public boolean isSupported(String feature, String version); 568 569 /** 570 * Return the base URI of the document entity. If it is not known 571 * (because the document was parsed from a socket connection or from 572 * standard input, for example), the value of this property is unknown. 573 * 574 * @return the document base URI String object or null if unknown. 575 */ 576 public String getDocumentBaseURI(); 577 578 /** 579 * Set the base URI of the document entity. 580 * 581 * @param baseURI the document base URI String object or null if unknown. 582 */ 583 public void setDocumentBaseURI(String baseURI); 584 585 /** 586 * Return the system identifier of the document entity. If 587 * it is not known, the value of this property is null. 588 * 589 * @param nodeHandle The node id, which can be any valid node handle. 590 * @return the system identifier String object or null if unknown. 591 */ 592 public String getDocumentSystemIdentifier(int nodeHandle); 593 594 /** 595 * Return the name of the character encoding scheme 596 * in which the document entity is expressed. 597 * 598 * @param nodeHandle The node id, which can be any valid node handle. 599 * @return the document encoding String object. 600 */ 601 public String getDocumentEncoding(int nodeHandle); 602 603 /** 604 * Return an indication of the standalone status of the document, 605 * either "yes" or "no". This property is derived from the optional 606 * standalone document declaration in the XML declaration at the 607 * beginning of the document entity, and has no value if there is no 608 * standalone document declaration. 609 * 610 * @param nodeHandle The node id, which can be any valid node handle. 611 * @return the document standalone String object, either "yes", "no", or null. 612 */ 613 public String getDocumentStandalone(int nodeHandle); 614 615 /** 616 * Return a string representing the XML version of the document. This 617 * property is derived from the XML declaration optionally present at the 618 * beginning of the document entity, and has no value if there is no XML 619 * declaration. 620 * 621 * @param documentHandle the document handle 622 * @return the document version String object 623 */ 624 public String getDocumentVersion(int documentHandle); 625 626 /** 627 * Return an indication of 628 * whether the processor has read the complete DTD. Its value is a 629 * boolean. If it is false, then certain properties (indicated in their 630 * descriptions below) may be unknown. If it is true, those properties 631 * are never unknown. 632 * 633 * @return <code>true</code> if all declarations were processed; 634 * <code>false</code> otherwise. 635 */ 636 public boolean getDocumentAllDeclarationsProcessed(); 637 638 /** 639 * A document type declaration information item has the following properties: 640 * 641 * 1. [system identifier] The system identifier of the external subset, if 642 * it exists. Otherwise this property has no value. 643 * 644 * @return the system identifier String object, or null if there is none. 645 */ 646 public String getDocumentTypeDeclarationSystemIdentifier(); 647 648 /** 649 * Return the public identifier of the external subset, 650 * normalized as described in 4.2.2 External Entities [XML]. If there is 651 * no external subset or if it has no public identifier, this property 652 * has no value. 653 * 654 * @return the public identifier String object, or null if there is none. 655 */ 656 public String getDocumentTypeDeclarationPublicIdentifier(); 657 658 /** 659 * Returns the <code>Element</code> whose <code>ID</code> is given by 660 * <code>elementId</code>. If no such element exists, returns 661 * <code>DTM.NULL</code>. Behavior is not defined if more than one element 662 * has this <code>ID</code>. Attributes (including those 663 * with the name "ID") are not of type ID unless so defined by DTD/Schema 664 * information available to the DTM implementation. 665 * Implementations that do not know whether attributes are of type ID or 666 * not are expected to return <code>DTM.NULL</code>. 667 * 668 * <p>%REVIEW% Presumably IDs are still scoped to a single document, 669 * and this operation searches only within a single document, right? 670 * Wouldn't want collisions between DTMs in the same process.</p> 671 * 672 * @param elementId The unique <code>id</code> value for an element. 673 * @return The handle of the matching element. 674 */ 675 public int getElementById(String elementId); 676 677 /** 678 * The getUnparsedEntityURI function returns the URI of the unparsed 679 * entity with the specified name in the same document as the context 680 * node (see [3.3 Unparsed Entities]). It returns the empty string if 681 * there is no such entity. 682 * <p> 683 * XML processors may choose to use the System Identifier (if one 684 * is provided) to resolve the entity, rather than the URI in the 685 * Public Identifier. The details are dependent on the processor, and 686 * we would have to support some form of plug-in resolver to handle 687 * this properly. Currently, we simply return the System Identifier if 688 * present, and hope that it a usable URI or that our caller can 689 * map it to one. 690 * %REVIEW% Resolve Public Identifiers... or consider changing function name. 691 * <p> 692 * If we find a relative URI 693 * reference, XML expects it to be resolved in terms of the base URI 694 * of the document. The DOM doesn't do that for us, and it isn't 695 * entirely clear whether that should be done here; currently that's 696 * pushed up to a higher level of our application. (Note that DOM Level 697 * 1 didn't store the document's base URI.) 698 * %REVIEW% Consider resolving Relative URIs. 699 * <p> 700 * (The DOM's statement that "An XML processor may choose to 701 * completely expand entities before the structure model is passed 702 * to the DOM" refers only to parsed entities, not unparsed, and hence 703 * doesn't affect this function.) 704 * 705 * @param name A string containing the Entity Name of the unparsed 706 * entity. 707 * 708 * @return String containing the URI of the Unparsed Entity, or an 709 * empty string if no such entity exists. 710 */ 711 public String getUnparsedEntityURI(String name); 712 713 // ============== Boolean methods ================ 714 715 /** 716 * Return true if the xsl:strip-space or xsl:preserve-space was processed 717 * during construction of the document contained in this DTM. 718 * 719 * NEEDSDOC ($objectName$) @return 720 */ 721 public boolean supportsPreStripping(); 722 723 /** 724 * Figure out whether nodeHandle2 should be considered as being later 725 * in the document than nodeHandle1, in Document Order as defined 726 * by the XPath model. This may not agree with the ordering defined 727 * by other XML applications. 728 * <p> 729 * There are some cases where ordering isn't defined, and neither are 730 * the results of this function -- though we'll generally return true. 731 * <p> 732 * %REVIEW% Make sure this does the right thing with attribute nodes!!! 733 * <p> 734 * %REVIEW% Consider renaming for clarity. Perhaps isDocumentOrder(a,b)? 735 * 736 * @param firstNodeHandle DOM Node to perform position comparison on. 737 * @param secondNodeHandle DOM Node to perform position comparison on. 738 * 739 * @return false if secondNode comes before firstNode, otherwise return true. 740 * You can think of this as 741 * <code>(firstNode.documentOrderPosition <= secondNode.documentOrderPosition)</code>. 742 */ 743 public boolean isNodeAfter(int firstNodeHandle, int secondNodeHandle); 744 745 /** 746 * 2. [element content whitespace] A boolean indicating whether a 747 * text node represents white space appearing within element content 748 * (see [XML], 2.10 "White Space Handling"). Note that validating 749 * XML processors are required by XML 1.0 to provide this 750 * information... but that DOM Level 2 did not support it, since it 751 * depends on knowledge of the DTD which DOM2 could not guarantee 752 * would be available. 753 * <p> 754 * If there is no declaration for the containing element, an XML 755 * processor must assume that the whitespace could be meaningful and 756 * return false. If no declaration has been read, but the [all 757 * declarations processed] property of the document information item 758 * is false (so there may be an unread declaration), then the value 759 * of this property is indeterminate for white space characters and 760 * should probably be reported as false. It is always false for text 761 * nodes that contain anything other than (or in addition to) white 762 * space. 763 * <p> 764 * Note too that it always returns false for non-Text nodes. 765 * <p> 766 * %REVIEW% Joe wants to rename this isWhitespaceInElementContent() for clarity 767 * 768 * @param nodeHandle the node ID. 769 * @return <code>true</code> if the node definitely represents whitespace in 770 * element content; <code>false</code> otherwise. 771 */ 772 public boolean isCharacterElementContentWhitespace(int nodeHandle); 773 774 /** 775 * 10. [all declarations processed] This property is not strictly speaking 776 * part of the infoset of the document. Rather it is an indication of 777 * whether the processor has read the complete DTD. Its value is a 778 * boolean. If it is false, then certain properties (indicated in their 779 * descriptions below) may be unknown. If it is true, those properties 780 * are never unknown. 781 * 782 * @param documentHandle A node handle that must identify a document. 783 * @return <code>true</code> if all declarations were processed; 784 * <code>false</code> otherwise. 785 */ 786 public boolean isDocumentAllDeclarationsProcessed(int documentHandle); 787 788 /** 789 * 5. [specified] A flag indicating whether this attribute was actually 790 * specified in the start-tag of its element, or was defaulted from the 791 * DTD (or schema). 792 * 793 * @param attributeHandle The attribute handle 794 * @return <code>true</code> if the attribute was specified; 795 * <code>false</code> if it was defaulted or the handle doesn't 796 * refer to an attribute node. 797 */ 798 public boolean isAttributeSpecified(int attributeHandle); 799 800 // ========== Direct SAX Dispatch, for optimization purposes ======== 801 802 /** 803 * Directly call the 804 * characters method on the passed ContentHandler for the 805 * string-value of the given node (see http://www.w3.org/TR/xpath#data-model 806 * for the definition of a node's string-value). Multiple calls to the 807 * ContentHandler's characters methods may well occur for a single call to 808 * this method. 809 * 810 * @param nodeHandle The node ID. 811 * @param ch A non-null reference to a ContentHandler. 812 * @param normalize true if the content should be normalized according to 813 * the rules for the XPath 814 * <a href="http://www.w3.org/TR/xpath#function-normalize-space">normalize-space</a> 815 * function. 816 * 817 * @throws org.xml.sax.SAXException 818 */ 819 public void dispatchCharactersEvents( 820 int nodeHandle, org.xml.sax.ContentHandler ch, boolean normalize) 821 throws org.xml.sax.SAXException; 822 823 /** 824 * Directly create SAX parser events representing the XML content of 825 * a DTM subtree. This is a "serialize" operation. 826 * 827 * @param nodeHandle The node ID. 828 * @param ch A non-null reference to a ContentHandler. 829 * 830 * @throws org.xml.sax.SAXException 831 */ 832 public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) 833 throws org.xml.sax.SAXException; 834 835 /** 836 * Return an DOM node for the given node. 837 * 838 * @param nodeHandle The node ID. 839 * 840 * @return A node representation of the DTM node. 841 */ 842 public org.w3c.dom.Node getNode(int nodeHandle); 843 844 // ==== Construction methods (may not be supported by some implementations!) ===== 845 // %REVIEW% What response occurs if not supported? 846 847 /** 848 * @return true iff we're building this model incrementally (eg 849 * we're partnered with a CoroutineParser) and thus require that the 850 * transformation and the parse run simultaneously. Guidance to the 851 * DTMManager. 852 */ 853 public boolean needsTwoThreads(); 854 855 // %REVIEW% Do these appends make any sense, should we support a 856 // wider set of methods (like the "append" methods in the 857 // current DTMDocumentImpl draft), or should we just support SAX 858 // listener interfaces? Should it be a separate interface to 859 // make that distinction explicit? 860 861 /** 862 * Return this DTM's content handler, if it has one. 863 * 864 * @return null if this model doesn't respond to SAX events. 865 */ 866 public org.xml.sax.ContentHandler getContentHandler(); 867 868 /** 869 * Return this DTM's lexical handler, if it has one. 870 * 871 * %REVIEW% Should this return null if constrution already done/begun? 872 * 873 * @return null if this model doesn't respond to lexical SAX events. 874 */ 875 public org.xml.sax.ext.LexicalHandler getLexicalHandler(); 876 877 /** 878 * Return this DTM's EntityResolver, if it has one. 879 * 880 * @return null if this model doesn't respond to SAX entity ref events. 881 */ 882 public org.xml.sax.EntityResolver getEntityResolver(); 883 884 /** 885 * Return this DTM's DTDHandler, if it has one. 886 * 887 * @return null if this model doesn't respond to SAX dtd events. 888 */ 889 public org.xml.sax.DTDHandler getDTDHandler(); 890 891 /** 892 * Return this DTM's ErrorHandler, if it has one. 893 * 894 * @return null if this model doesn't respond to SAX error events. 895 */ 896 public org.xml.sax.ErrorHandler getErrorHandler(); 897 898 /** 899 * Return this DTM's DeclHandler, if it has one. 900 * 901 * @return null if this model doesn't respond to SAX Decl events. 902 */ 903 public org.xml.sax.ext.DeclHandler getDeclHandler(); 904 905 /** 906 * Append a child to "the end of the document". Please note that 907 * the node is always cloned in a base DTM, since our basic behavior 908 * is immutable so nodes can't be removed from their previous 909 * location. 910 * 911 * <p> %REVIEW% DTM maintains an insertion cursor which 912 * performs a depth-first tree walk as nodes come in, and this operation 913 * is really equivalent to: 914 * insertionCursor.appendChild(document.importNode(newChild))) 915 * where the insert point is the last element that was appended (or 916 * the last one popped back to by an end-element operation).</p> 917 * 918 * @param newChild Must be a valid new node handle. 919 * @param clone true if the child should be cloned into the document. 920 * @param cloneDepth if the clone argument is true, specifies that the 921 * clone should include all it's children. 922 */ 923 public void appendChild(int newChild, boolean clone, boolean cloneDepth); 924 925 /** 926 * Append a text node child that will be constructed from a string, 927 * to the end of the document. Behavior is otherwise like appendChild(). 928 * 929 * @param str Non-null reference to a string. 930 */ 931 public void appendTextChild(String str); 932 933 /** 934 * Get the location of a node in the source document. 935 * 936 * @param node an <code>int</code> value 937 * @return a <code>SourceLocator</code> value or null if no location 938 * is available 939 */ 940 public SourceLocator getSourceLocatorFor(int node); 941 942 /** 943 * As the DTM is registered with the DTMManager, this method 944 * will be called. This will give the DTM implementation a 945 * chance to initialize any subsystems that are required to 946 * build the DTM 947 */ 948 public void documentRegistration(); 949 950 /** 951 * As documents are released from the DTMManager, the DTM implementation 952 * will be notified of the event. This will allow the DTM implementation 953 * to shutdown any subsystem activity that may of been assoiated with 954 * the active DTM Implementation. 955 */ 956 957 public void documentRelease(); 958 959 /** 960 * Migrate a DTM built with an old DTMManager to a new DTMManager. 961 * After the migration, the new DTMManager will treat the DTM as 962 * one that is built by itself. 963 * This is used to support DTM sharing between multiple transformations. 964 * @param manager the DTMManager 965 */ 966 public void migrateTo(DTMManager manager); 967 } 968