Home | History | Annotate | Download | only in dom2dtm
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: DOM2DTM.java 478671 2006-11-23 21:00:31Z minchau $
     20  */
     21 package org.apache.xml.dtm.ref.dom2dtm;
     22 
     23 import java.util.Vector;
     24 
     25 import javax.xml.transform.SourceLocator;
     26 import javax.xml.transform.dom.DOMSource;
     27 
     28 import org.apache.xml.dtm.DTM;
     29 import org.apache.xml.dtm.DTMManager;
     30 import org.apache.xml.dtm.DTMWSFilter;
     31 import org.apache.xml.dtm.ref.DTMDefaultBaseIterators;
     32 import org.apache.xml.dtm.ref.DTMManagerDefault;
     33 import org.apache.xml.dtm.ref.ExpandedNameTable;
     34 import org.apache.xml.dtm.ref.IncrementalSAXSource;
     35 import org.apache.xml.res.XMLErrorResources;
     36 import org.apache.xml.res.XMLMessages;
     37 import org.apache.xml.utils.FastStringBuffer;
     38 import org.apache.xml.utils.QName;
     39 import org.apache.xml.utils.StringBufferPool;
     40 import org.apache.xml.utils.TreeWalker;
     41 import org.apache.xml.utils.XMLCharacterRecognizer;
     42 import org.apache.xml.utils.XMLString;
     43 import org.apache.xml.utils.XMLStringFactory;
     44 import org.w3c.dom.Attr;
     45 import org.w3c.dom.Document;
     46 import org.w3c.dom.DocumentType;
     47 import org.w3c.dom.Element;
     48 import org.w3c.dom.Entity;
     49 import org.w3c.dom.NamedNodeMap;
     50 import org.w3c.dom.Node;
     51 import org.xml.sax.ContentHandler;
     52 
     53 /** The <code>DOM2DTM</code> class serves up a DOM's contents via the
     54  * DTM API.
     55  *
     56  * Note that it doesn't necessarily represent a full Document
     57  * tree. You can wrap a DOM2DTM around a specific node and its subtree
     58  * and the right things should happen. (I don't _think_ we currently
     59  * support DocumentFrgment nodes as roots, though that might be worth
     60  * considering.)
     61  *
     62  * Note too that we do not currently attempt to track document
     63  * mutation. If you alter the DOM after wrapping DOM2DTM around it,
     64  * all bets are off.
     65  * */
     66 public class DOM2DTM extends DTMDefaultBaseIterators
     67 {
     68   static final boolean JJK_DEBUG=false;
     69   static final boolean JJK_NEWCODE=true;
     70 
     71   /** Manefest constant
     72    */
     73   static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace";
     74 
     75   /** The current position in the DOM tree. Last node examined for
     76    * possible copying to DTM. */
     77   transient private Node m_pos;
     78   /** The current position in the DTM tree. Who children get appended to. */
     79   private int m_last_parent=0;
     80   /** The current position in the DTM tree. Who children reference as their
     81    * previous sib. */
     82   private int m_last_kid=NULL;
     83 
     84   /** The top of the subtree.
     85    * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.'
     86    * */
     87   transient private Node m_root;
     88 
     89   /** True iff the first element has been processed. This is used to control
     90       synthesis of the implied xml: namespace declaration node. */
     91   boolean m_processedFirstElement=false;
     92 
     93   /** true if ALL the nodes in the m_root subtree have been processed;
     94    * false if our incremental build has not yet finished scanning the
     95    * DOM tree.  */
     96   transient private boolean m_nodesAreProcessed;
     97 
     98   /** The node objects.  The instance part of the handle indexes
     99    * directly into this vector.  Each DTM node may actually be
    100    * composed of several DOM nodes (for example, if logically-adjacent
    101    * Text/CDATASection nodes in the DOM have been coalesced into a
    102    * single DTM Text node); this table points only to the first in
    103    * that sequence. */
    104   protected Vector m_nodes = new Vector();
    105 
    106   /**
    107    * Construct a DOM2DTM object from a DOM node.
    108    *
    109    * @param mgr The DTMManager who owns this DTM.
    110    * @param domSource the DOM source that this DTM will wrap.
    111    * @param dtmIdentity The DTM identity ID for this DTM.
    112    * @param whiteSpaceFilter The white space filter for this DTM, which may
    113    *                         be null.
    114    * @param xstringfactory XMLString factory for creating character content.
    115    * @param doIndexing true if the caller considers it worth it to use
    116    *                   indexing schemes.
    117    */
    118   public DOM2DTM(DTMManager mgr, DOMSource domSource,
    119                  int dtmIdentity, DTMWSFilter whiteSpaceFilter,
    120                  XMLStringFactory xstringfactory,
    121                  boolean doIndexing)
    122   {
    123     super(mgr, domSource, dtmIdentity, whiteSpaceFilter,
    124           xstringfactory, doIndexing);
    125 
    126     // Initialize DOM navigation
    127     m_pos=m_root = domSource.getNode();
    128     // Initialize DTM navigation
    129     m_last_parent=m_last_kid=NULL;
    130     m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL);
    131 
    132     // Apparently the domSource root may not actually be the
    133     // Document node. If it's an Element node, we need to immediately
    134     // add its attributes. Adapted from nextNode().
    135     // %REVIEW% Move this logic into addNode and recurse? Cleaner!
    136     //
    137     // (If it's an EntityReference node, we're probably in
    138     // seriously bad trouble. For now
    139     // I'm just hoping nobody is ever quite that foolish... %REVIEW%)
    140 		//
    141 		// %ISSUE% What about inherited namespaces in this case?
    142 		// Do we need to special-case initialize them into the DTM model?
    143     if(ELEMENT_NODE == m_root.getNodeType())
    144     {
    145       NamedNodeMap attrs=m_root.getAttributes();
    146       int attrsize=(attrs==null) ? 0 : attrs.getLength();
    147       if(attrsize>0)
    148       {
    149         int attrIndex=NULL; // start with no previous sib
    150         for(int i=0;i<attrsize;++i)
    151         {
    152           // No need to force nodetype in this case;
    153           // addNode() will take care of switching it from
    154           // Attr to Namespace if necessary.
    155           attrIndex=addNode(attrs.item(i),0,attrIndex,NULL);
    156           m_firstch.setElementAt(DTM.NULL,attrIndex);
    157         }
    158         // Terminate list of attrs, and make sure they aren't
    159         // considered children of the element
    160         m_nextsib.setElementAt(DTM.NULL,attrIndex);
    161 
    162         // IMPORTANT: This does NOT change m_last_parent or m_last_kid!
    163       } // if attrs exist
    164     } //if(ELEMENT_NODE)
    165 
    166     // Initialize DTM-completed status
    167     m_nodesAreProcessed = false;
    168   }
    169 
    170   /**
    171    * Construct the node map from the node.
    172    *
    173    * @param node The node that is to be added to the DTM.
    174    * @param parentIndex The current parent index.
    175    * @param previousSibling The previous sibling index.
    176    * @param forceNodeType If not DTM.NULL, overrides the DOM node type.
    177    *	Used to force nodes to Text rather than CDATASection when their
    178    *	coalesced value includes ordinary Text nodes (current DTM behavior).
    179    *
    180    * @return The index identity of the node that was added.
    181    */
    182   protected int addNode(Node node, int parentIndex,
    183                         int previousSibling, int forceNodeType)
    184   {
    185     int nodeIndex = m_nodes.size();
    186 
    187     // Have we overflowed a DTM Identity's addressing range?
    188     if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS))
    189     {
    190       try
    191       {
    192         if(m_mgr==null)
    193           throw new ClassCastException();
    194 
    195                                 // Handle as Extended Addressing
    196         DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr;
    197         int id=mgrD.getFirstFreeDTMID();
    198         mgrD.addDTM(this,id,nodeIndex);
    199         m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS);
    200       }
    201       catch(ClassCastException e)
    202       {
    203         // %REVIEW% Wrong error message, but I've been told we're trying
    204         // not to add messages right not for I18N reasons.
    205         // %REVIEW% Should this be a Fatal Error?
    206         error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available";
    207       }
    208     }
    209 
    210     m_size++;
    211     // ensureSize(nodeIndex);
    212 
    213     int type;
    214     if(NULL==forceNodeType)
    215         type = node.getNodeType();
    216     else
    217         type=forceNodeType;
    218 
    219     // %REVIEW% The Namespace Spec currently says that Namespaces are
    220     // processed in a non-namespace-aware manner, by matching the
    221     // QName, even though there is in fact a namespace assigned to
    222     // these nodes in the DOM. If and when that changes, we will have
    223     // to consider whether we check the namespace-for-namespaces
    224     // rather than the node name.
    225     //
    226     // %TBD% Note that the DOM does not necessarily explicitly declare
    227     // all the namespaces it uses. DOM Level 3 will introduce a
    228     // namespace-normalization operation which reconciles that, and we
    229     // can request that users invoke it or otherwise ensure that the
    230     // tree is namespace-well-formed before passing the DOM to Xalan.
    231     // But if they don't, what should we do about it? We probably
    232     // don't want to alter the source DOM (and may not be able to do
    233     // so if it's read-only). The best available answer might be to
    234     // synthesize additional DTM Namespace Nodes that don't correspond
    235     // to DOM Attr Nodes.
    236     if (Node.ATTRIBUTE_NODE == type)
    237     {
    238       String name = node.getNodeName();
    239 
    240       if (name.startsWith("xmlns:") || name.equals("xmlns"))
    241       {
    242         type = DTM.NAMESPACE_NODE;
    243       }
    244     }
    245 
    246     m_nodes.addElement(node);
    247 
    248     m_firstch.setElementAt(NOTPROCESSED,nodeIndex);
    249     m_nextsib.setElementAt(NOTPROCESSED,nodeIndex);
    250     m_prevsib.setElementAt(previousSibling,nodeIndex);
    251     m_parent.setElementAt(parentIndex,nodeIndex);
    252 
    253     if(DTM.NULL != parentIndex &&
    254        type != DTM.ATTRIBUTE_NODE &&
    255        type != DTM.NAMESPACE_NODE)
    256     {
    257       // If the DTM parent had no children, this becomes its first child.
    258       if(NOTPROCESSED == m_firstch.elementAt(parentIndex))
    259         m_firstch.setElementAt(nodeIndex,parentIndex);
    260     }
    261 
    262     String nsURI = node.getNamespaceURI();
    263 
    264     // Deal with the difference between Namespace spec and XSLT
    265     // definitions of local name. (The former says PIs don't have
    266     // localnames; the latter says they do.)
    267     String localName =  (type == Node.PROCESSING_INSTRUCTION_NODE) ?
    268                          node.getNodeName() :
    269                          node.getLocalName();
    270 
    271     // Hack to make DOM1 sort of work...
    272     if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE))
    273         && null == localName)
    274       localName = node.getNodeName(); // -sb
    275 
    276     ExpandedNameTable exnt = m_expandedNameTable;
    277 
    278     // %TBD% Nodes created with the old non-namespace-aware DOM
    279     // calls createElement() and createAttribute() will never have a
    280     // localname. That will cause their expandedNameID to be just the
    281     // nodeType... which will keep them from being matched
    282     // successfully by name. Since the DOM makes no promise that
    283     // those will participate in namespace processing, this is
    284     // officially accepted as Not Our Fault. But it might be nice to
    285     // issue a diagnostic message!
    286     if(node.getLocalName()==null &&
    287        (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE))
    288       {
    289         // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM.");
    290       }
    291 
    292     int expandedNameID = (null != localName)
    293        ? exnt.getExpandedTypeID(nsURI, localName, type) :
    294          exnt.getExpandedTypeID(type);
    295 
    296     m_exptype.setElementAt(expandedNameID,nodeIndex);
    297 
    298     indexNode(expandedNameID, nodeIndex);
    299 
    300     if (DTM.NULL != previousSibling)
    301       m_nextsib.setElementAt(nodeIndex,previousSibling);
    302 
    303     // This should be done after m_exptype has been set, and probably should
    304     // always be the last thing we do
    305     if (type == DTM.NAMESPACE_NODE)
    306         declareNamespaceInContext(parentIndex,nodeIndex);
    307 
    308     return nodeIndex;
    309   }
    310 
    311   /**
    312    * Get the number of nodes that have been added.
    313    */
    314   public int getNumberOfNodes()
    315   {
    316     return m_nodes.size();
    317   }
    318 
    319  /**
    320    * This method iterates to the next node that will be added to the table.
    321    * Each call to this method adds a new node to the table, unless the end
    322    * is reached, in which case it returns null.
    323    *
    324    * @return The true if a next node is found or false if
    325    *         there are no more nodes.
    326    */
    327   protected boolean nextNode()
    328   {
    329     // Non-recursive one-fetch-at-a-time depth-first traversal with
    330     // attribute/namespace nodes and white-space stripping.
    331     // Navigating the DOM is simple, navigating the DTM is simple;
    332     // keeping track of both at once is a trifle baroque but at least
    333     // we've avoided most of the special cases.
    334     if (m_nodesAreProcessed)
    335       return false;
    336 
    337     // %REVIEW% Is this local copy Really Useful from a performance
    338     // point of view?  Or is this a false microoptimization?
    339     Node pos=m_pos;
    340     Node next=null;
    341     int nexttype=NULL;
    342 
    343     // Navigate DOM tree
    344     do
    345       {
    346         // Look down to first child.
    347         if (pos.hasChildNodes())
    348           {
    349             next = pos.getFirstChild();
    350 
    351             // %REVIEW% There's probably a more elegant way to skip
    352             // the doctype. (Just let it go and Suppress it?
    353             if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
    354               next=next.getNextSibling();
    355 
    356             // Push DTM context -- except for children of Entity References,
    357             // which have no DTM equivalent and cause no DTM navigation.
    358             if(ENTITY_REFERENCE_NODE!=pos.getNodeType())
    359               {
    360                 m_last_parent=m_last_kid;
    361                 m_last_kid=NULL;
    362                 // Whitespace-handler context stacking
    363                 if(null != m_wsfilter)
    364                 {
    365                   short wsv =
    366                     m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this);
    367                   boolean shouldStrip = (DTMWSFilter.INHERIT == wsv)
    368                     ? getShouldStripWhitespace()
    369                     : (DTMWSFilter.STRIP == wsv);
    370                   pushShouldStripWhitespace(shouldStrip);
    371                 } // if(m_wsfilter)
    372               }
    373           }
    374 
    375         // If that fails, look up and right (but not past root!)
    376         else
    377           {
    378             if(m_last_kid!=NULL)
    379               {
    380                 // Last node posted at this level had no more children
    381                 // If it has _no_ children, we need to record that.
    382                 if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED)
    383                   m_firstch.setElementAt(NULL,m_last_kid);
    384               }
    385 
    386             while(m_last_parent != NULL)
    387               {
    388                 // %REVIEW% There's probably a more elegant way to
    389                 // skip the doctype. (Just let it go and Suppress it?
    390                 next = pos.getNextSibling();
    391                 if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType())
    392                   next=next.getNextSibling();
    393 
    394                 if(next!=null)
    395                   break; // Found it!
    396 
    397                 // No next-sibling found. Pop the DOM.
    398                 pos=pos.getParentNode();
    399                 if(pos==null)
    400                   {
    401                     // %TBD% Should never arise, but I want to be sure of that...
    402                     if(JJK_DEBUG)
    403                       {
    404                         System.out.println("***** DOM2DTM Pop Control Flow problem");
    405                         for(;;); // Freeze right here!
    406                       }
    407                   }
    408 
    409                 // The only parents in the DTM are Elements.  However,
    410                 // the DOM could contain EntityReferences.  If we
    411                 // encounter one, pop it _without_ popping DTM.
    412                 if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType())
    413                   {
    414                     // Nothing needs doing
    415                     if(JJK_DEBUG)
    416                       System.out.println("***** DOM2DTM popping EntRef");
    417                   }
    418                 else
    419                   {
    420                     popShouldStripWhitespace();
    421                     // Fix and pop DTM
    422                     if(m_last_kid==NULL)
    423                       m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element
    424                     else
    425                       m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else
    426                     m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent);
    427                   }
    428               }
    429             if(m_last_parent==NULL)
    430               next=null;
    431           }
    432 
    433         if(next!=null)
    434           nexttype=next.getNodeType();
    435 
    436         // If it's an entity ref, advance past it.
    437         //
    438         // %REVIEW% Should we let this out the door and just suppress it?
    439         // More work, but simpler code, more likely to be correct, and
    440         // it doesn't happen very often. We'd get rid of the loop too.
    441         if (ENTITY_REFERENCE_NODE == nexttype)
    442           pos=next;
    443       }
    444     while (ENTITY_REFERENCE_NODE == nexttype);
    445 
    446     // Did we run out of the tree?
    447     if(next==null)
    448       {
    449         m_nextsib.setElementAt(NULL,0);
    450         m_nodesAreProcessed = true;
    451         m_pos=null;
    452 
    453         if(JJK_DEBUG)
    454           {
    455             System.out.println("***** DOM2DTM Crosscheck:");
    456             for(int i=0;i<m_nodes.size();++i)
    457               System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i));
    458           }
    459 
    460         return false;
    461       }
    462 
    463     // Text needs some special handling:
    464     //
    465     // DTM may skip whitespace. This is handled by the suppressNode flag, which
    466     // when true will keep the DTM node from being created.
    467     //
    468     // DTM only directly records the first DOM node of any logically-contiguous
    469     // sequence. The lastTextNode value will be set to the last node in the
    470     // contiguous sequence, and -- AFTER the DTM addNode -- can be used to
    471     // advance next over this whole block. Should be simpler than special-casing
    472     // the above loop for "Was the logically-preceeding sibling a text node".
    473     //
    474     // Finally, a DTM node should be considered a CDATASection only if all the
    475     // contiguous text it covers is CDATASections. The first Text should
    476     // force DTM to Text.
    477 
    478     boolean suppressNode=false;
    479     Node lastTextNode=null;
    480 
    481     nexttype=next.getNodeType();
    482 
    483     // nexttype=pos.getNodeType();
    484     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
    485       {
    486         // If filtering, initially assume we're going to suppress the node
    487         suppressNode=((null != m_wsfilter) && getShouldStripWhitespace());
    488 
    489         // Scan logically contiguous text (siblings, plus "flattening"
    490         // of entity reference boundaries).
    491         Node n=next;
    492         while(n!=null)
    493           {
    494             lastTextNode=n;
    495             // Any Text node means DTM considers it all Text
    496             if(TEXT_NODE == n.getNodeType())
    497               nexttype=TEXT_NODE;
    498             // Any non-whitespace in this sequence blocks whitespace
    499             // suppression
    500             suppressNode &=
    501               XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue());
    502 
    503             n=logicalNextDOMTextNode(n);
    504           }
    505       }
    506 
    507     // Special handling for PIs: Some DOMs represent the XML
    508     // Declaration as a PI. This is officially incorrect, per the DOM
    509     // spec, but is considered a "wrong but tolerable" temporary
    510     // workaround pending proper handling of these fields in DOM Level
    511     // 3. We want to recognize and reject that case.
    512     else if(PROCESSING_INSTRUCTION_NODE==nexttype)
    513       {
    514         suppressNode = (pos.getNodeName().toLowerCase().equals("xml"));
    515       }
    516 
    517 
    518     if(!suppressNode)
    519       {
    520         // Inserting next. NOTE that we force the node type; for
    521         // coalesced Text, this records CDATASections adjacent to
    522         // ordinary Text as Text.
    523         int nextindex=addNode(next,m_last_parent,m_last_kid,
    524 			      nexttype);
    525 
    526         m_last_kid=nextindex;
    527 
    528         if(ELEMENT_NODE == nexttype)
    529           {
    530             int attrIndex=NULL; // start with no previous sib
    531             // Process attributes _now_, rather than waiting.
    532             // Simpler control flow, makes NS cache available immediately.
    533             NamedNodeMap attrs=next.getAttributes();
    534             int attrsize=(attrs==null) ? 0 : attrs.getLength();
    535             if(attrsize>0)
    536               {
    537                 for(int i=0;i<attrsize;++i)
    538                   {
    539                     // No need to force nodetype in this case;
    540                     // addNode() will take care of switching it from
    541                     // Attr to Namespace if necessary.
    542                     attrIndex=addNode(attrs.item(i),
    543                                       nextindex,attrIndex,NULL);
    544                     m_firstch.setElementAt(DTM.NULL,attrIndex);
    545 
    546                     // If the xml: prefix is explicitly declared
    547                     // we don't need to synthesize one.
    548 		    //
    549 		    // NOTE that XML Namespaces were not originally
    550 		    // defined as being namespace-aware (grrr), and
    551 		    // while the W3C is planning to fix this it's
    552 		    // safer for now to test the QName and trust the
    553 		    // parsers to prevent anyone from redefining the
    554 		    // reserved xmlns: prefix
    555                     if(!m_processedFirstElement
    556                        && "xmlns:xml".equals(attrs.item(i).getNodeName()))
    557                       m_processedFirstElement=true;
    558                   }
    559                 // Terminate list of attrs, and make sure they aren't
    560                 // considered children of the element
    561               } // if attrs exist
    562             if(!m_processedFirstElement)
    563             {
    564               // The DOM might not have an explicit declaration for the
    565               // implicit "xml:" prefix, but the XPath data model
    566               // requires that this appear as a Namespace Node so we
    567               // have to synthesize one. You can think of this as
    568               // being a default attribute defined by the XML
    569               // Namespaces spec rather than by the DTD.
    570               attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode(
    571 																	(Element)next,"xml",NAMESPACE_DECL_NS,
    572 																	makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1)
    573 																	),
    574                                 nextindex,attrIndex,NULL);
    575               m_firstch.setElementAt(DTM.NULL,attrIndex);
    576               m_processedFirstElement=true;
    577             }
    578             if(attrIndex!=NULL)
    579               m_nextsib.setElementAt(DTM.NULL,attrIndex);
    580           } //if(ELEMENT_NODE)
    581       } // (if !suppressNode)
    582 
    583     // Text postprocessing: Act on values stored above
    584     if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype)
    585       {
    586         // %TBD% If nexttype was forced to TEXT, patch the DTM node
    587 
    588         next=lastTextNode;      // Advance the DOM cursor over contiguous text
    589       }
    590 
    591     // Remember where we left off.
    592     m_pos=next;
    593     return true;
    594   }
    595 
    596 
    597   /**
    598    * Return an DOM node for the given node.
    599    *
    600    * @param nodeHandle The node ID.
    601    *
    602    * @return A node representation of the DTM node.
    603    */
    604   public Node getNode(int nodeHandle)
    605   {
    606 
    607     int identity = makeNodeIdentity(nodeHandle);
    608 
    609     return (Node) m_nodes.elementAt(identity);
    610   }
    611 
    612   /**
    613    * Get a Node from an identity index.
    614    *
    615    * NEEDSDOC @param nodeIdentity
    616    *
    617    * NEEDSDOC ($objectName$) @return
    618    */
    619   protected Node lookupNode(int nodeIdentity)
    620   {
    621     return (Node) m_nodes.elementAt(nodeIdentity);
    622   }
    623 
    624   /**
    625    * Get the next node identity value in the list, and call the iterator
    626    * if it hasn't been added yet.
    627    *
    628    * @param identity The node identity (index).
    629    * @return identity+1, or DTM.NULL.
    630    */
    631   protected int getNextNodeIdentity(int identity)
    632   {
    633 
    634     identity += 1;
    635 
    636     if (identity >= m_nodes.size())
    637     {
    638       if (!nextNode())
    639         identity = DTM.NULL;
    640     }
    641 
    642     return identity;
    643   }
    644 
    645   /**
    646    * Get the handle from a Node.
    647    * <p>%OPT% This will be pretty slow.</p>
    648    *
    649    * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path;
    650    * walk down DTM reconstructing path) might be considerably faster
    651    * on later nodes in large documents. That might also imply improving
    652    * this call to handle nodes which would be in this DTM but
    653    * have not yet been built, which might or might not be a Good Thing.</p>
    654    *
    655    * %REVIEW% This relies on being able to test node-identity via
    656    * object-identity. DTM2DOM proxying is a great example of a case where
    657    * that doesn't work. DOM Level 3 will provide the isSameNode() method
    658    * to fix that, but until then this is going to be flaky.
    659    *
    660    * @param node A node, which may be null.
    661    *
    662    * @return The node handle or <code>DTM.NULL</code>.
    663    */
    664   private int getHandleFromNode(Node node)
    665   {
    666     if (null != node)
    667     {
    668       int len = m_nodes.size();
    669       boolean isMore;
    670       int i = 0;
    671       do
    672       {
    673         for (; i < len; i++)
    674         {
    675           if (m_nodes.elementAt(i) == node)
    676             return makeNodeHandle(i);
    677         }
    678 
    679         isMore = nextNode();
    680 
    681         len = m_nodes.size();
    682 
    683       }
    684       while(isMore || i < len);
    685     }
    686 
    687     return DTM.NULL;
    688   }
    689 
    690   /** Get the handle from a Node. This is a more robust version of
    691    * getHandleFromNode, intended to be usable by the public.
    692    *
    693    * <p>%OPT% This will be pretty slow.</p>
    694    *
    695    * %REVIEW% This relies on being able to test node-identity via
    696    * object-identity. DTM2DOM proxying is a great example of a case where
    697    * that doesn't work. DOM Level 3 will provide the isSameNode() method
    698    * to fix that, but until then this is going to be flaky.
    699    *
    700    * @param node A node, which may be null.
    701    *
    702    * @return The node handle or <code>DTM.NULL</code>.  */
    703   public int getHandleOfNode(Node node)
    704   {
    705     if (null != node)
    706     {
    707       // Is Node actually within the same document? If not, don't search!
    708       // This would be easier if m_root was always the Document node, but
    709       // we decided to allow wrapping a DTM around a subtree.
    710       if((m_root==node) ||
    711          (m_root.getNodeType()==DOCUMENT_NODE &&
    712           m_root==node.getOwnerDocument()) ||
    713          (m_root.getNodeType()!=DOCUMENT_NODE &&
    714           m_root.getOwnerDocument()==node.getOwnerDocument())
    715          )
    716         {
    717           // If node _is_ in m_root's tree, find its handle
    718           //
    719           // %OPT% This check may be improved significantly when DOM
    720           // Level 3 nodeKey and relative-order tests become
    721           // available!
    722           for(Node cursor=node;
    723               cursor!=null;
    724               cursor=
    725                 (cursor.getNodeType()!=ATTRIBUTE_NODE)
    726                 ? cursor.getParentNode()
    727                 : ((org.w3c.dom.Attr)cursor).getOwnerElement())
    728             {
    729               if(cursor==m_root)
    730                 // We know this node; find its handle.
    731                 return getHandleFromNode(node);
    732             } // for ancestors of node
    733         } // if node and m_root in same Document
    734     } // if node!=null
    735 
    736     return DTM.NULL;
    737   }
    738 
    739   /**
    740    * Retrieves an attribute node by by qualified name and namespace URI.
    741    *
    742    * @param nodeHandle int Handle of the node upon which to look up this attribute..
    743    * @param namespaceURI The namespace URI of the attribute to
    744    *   retrieve, or null.
    745    * @param name The local name of the attribute to
    746    *   retrieve.
    747    * @return The attribute node handle with the specified name (
    748    *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
    749    *   attribute.
    750    */
    751   public int getAttributeNode(int nodeHandle, String namespaceURI,
    752                               String name)
    753   {
    754 
    755     // %OPT% This is probably slower than it needs to be.
    756     if (null == namespaceURI)
    757       namespaceURI = "";
    758 
    759     int type = getNodeType(nodeHandle);
    760 
    761     if (DTM.ELEMENT_NODE == type)
    762     {
    763 
    764       // Assume that attributes immediately follow the element.
    765       int identity = makeNodeIdentity(nodeHandle);
    766 
    767       while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
    768       {
    769         // Assume this can not be null.
    770         type = _type(identity);
    771 
    772 				// %REVIEW%
    773 				// Should namespace nodes be retrievable DOM-style as attrs?
    774 				// If not we need a separate function... which may be desirable
    775 				// architecturally, but which is ugly from a code point of view.
    776 				// (If we REALLY insist on it, this code should become a subroutine
    777 				// of both -- retrieve the node, then test if the type matches
    778 				// what you're looking for.)
    779         if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE)
    780         {
    781           Node node = lookupNode(identity);
    782           String nodeuri = node.getNamespaceURI();
    783 
    784           if (null == nodeuri)
    785             nodeuri = "";
    786 
    787           String nodelocalname = node.getLocalName();
    788 
    789           if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname))
    790             return makeNodeHandle(identity);
    791         }
    792 
    793         else // if (DTM.NAMESPACE_NODE != type)
    794         {
    795           break;
    796         }
    797       }
    798     }
    799 
    800     return DTM.NULL;
    801   }
    802 
    803   /**
    804    * Get the string-value of a node as a String object
    805    * (see http://www.w3.org/TR/xpath#data-model
    806    * for the definition of a node's string-value).
    807    *
    808    * @param nodeHandle The node ID.
    809    *
    810    * @return A string object that represents the string-value of the given node.
    811    */
    812   public XMLString getStringValue(int nodeHandle)
    813   {
    814 
    815     int type = getNodeType(nodeHandle);
    816     Node node = getNode(nodeHandle);
    817     // %TBD% If an element only has one text node, we should just use it
    818     // directly.
    819     if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type
    820     || DTM.DOCUMENT_FRAGMENT_NODE == type)
    821     {
    822       FastStringBuffer buf = StringBufferPool.get();
    823       String s;
    824 
    825       try
    826       {
    827         getNodeData(node, buf);
    828 
    829         s = (buf.length() > 0) ? buf.toString() : "";
    830       }
    831       finally
    832       {
    833         StringBufferPool.free(buf);
    834       }
    835 
    836       return m_xstrf.newstr( s );
    837     }
    838     else if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
    839     {
    840       // If this is a DTM text node, it may be made of multiple DOM text
    841       // nodes -- including navigating into Entity References. DOM2DTM
    842       // records the first node in the sequence and requires that we
    843       // pick up the others when we retrieve the DTM node's value.
    844       //
    845       // %REVIEW% DOM Level 3 is expected to add a "whole text"
    846       // retrieval method which performs this function for us.
    847       FastStringBuffer buf = StringBufferPool.get();
    848       while(node!=null)
    849       {
    850         buf.append(node.getNodeValue());
    851         node=logicalNextDOMTextNode(node);
    852       }
    853       String s=(buf.length() > 0) ? buf.toString() : "";
    854       StringBufferPool.free(buf);
    855       return m_xstrf.newstr( s );
    856     }
    857     else
    858       return m_xstrf.newstr( node.getNodeValue() );
    859   }
    860 
    861   /**
    862    * Determine if the string-value of a node is whitespace
    863    *
    864    * @param nodeHandle The node Handle.
    865    *
    866    * @return Return true if the given node is whitespace.
    867    */
    868   public boolean isWhitespace(int nodeHandle)
    869   {
    870   	int type = getNodeType(nodeHandle);
    871     Node node = getNode(nodeHandle);
    872   	if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
    873     {
    874       // If this is a DTM text node, it may be made of multiple DOM text
    875       // nodes -- including navigating into Entity References. DOM2DTM
    876       // records the first node in the sequence and requires that we
    877       // pick up the others when we retrieve the DTM node's value.
    878       //
    879       // %REVIEW% DOM Level 3 is expected to add a "whole text"
    880       // retrieval method which performs this function for us.
    881       FastStringBuffer buf = StringBufferPool.get();
    882       while(node!=null)
    883       {
    884         buf.append(node.getNodeValue());
    885         node=logicalNextDOMTextNode(node);
    886       }
    887      boolean b = buf.isWhitespace(0, buf.length());
    888       StringBufferPool.free(buf);
    889      return b;
    890     }
    891     return false;
    892   }
    893 
    894   /**
    895    * Retrieve the text content of a DOM subtree, appending it into a
    896    * user-supplied FastStringBuffer object. Note that attributes are
    897    * not considered part of the content of an element.
    898    * <p>
    899    * There are open questions regarding whitespace stripping.
    900    * Currently we make no special effort in that regard, since the standard
    901    * DOM doesn't yet provide DTD-based information to distinguish
    902    * whitespace-in-element-context from genuine #PCDATA. Note that we
    903    * should probably also consider xml:space if/when we address this.
    904    * DOM Level 3 may solve the problem for us.
    905    * <p>
    906    * %REVIEW% Actually, since this method operates on the DOM side of the
    907    * fence rather than the DTM side, it SHOULDN'T do
    908    * any special handling. The DOM does what the DOM does; if you want
    909    * DTM-level abstractions, use DTM-level methods.
    910    *
    911    * @param node Node whose subtree is to be walked, gathering the
    912    * contents of all Text or CDATASection nodes.
    913    * @param buf FastStringBuffer into which the contents of the text
    914    * nodes are to be concatenated.
    915    */
    916   protected static void getNodeData(Node node, FastStringBuffer buf)
    917   {
    918 
    919     switch (node.getNodeType())
    920     {
    921     case Node.DOCUMENT_FRAGMENT_NODE :
    922     case Node.DOCUMENT_NODE :
    923     case Node.ELEMENT_NODE :
    924     {
    925       for (Node child = node.getFirstChild(); null != child;
    926               child = child.getNextSibling())
    927       {
    928         getNodeData(child, buf);
    929       }
    930     }
    931     break;
    932     case Node.TEXT_NODE :
    933     case Node.CDATA_SECTION_NODE :
    934     case Node.ATTRIBUTE_NODE :	// Never a child but might be our starting node
    935       buf.append(node.getNodeValue());
    936       break;
    937     case Node.PROCESSING_INSTRUCTION_NODE :
    938       // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
    939       break;
    940     default :
    941       // ignore
    942       break;
    943     }
    944   }
    945 
    946   /**
    947    * Given a node handle, return its DOM-style node name. This will
    948    * include names such as #text or #document.
    949    *
    950    * @param nodeHandle the id of the node.
    951    * @return String Name of this node, which may be an empty string.
    952    * %REVIEW% Document when empty string is possible...
    953    * %REVIEW-COMMENT% It should never be empty, should it?
    954    */
    955   public String getNodeName(int nodeHandle)
    956   {
    957 
    958     Node node = getNode(nodeHandle);
    959 
    960     // Assume non-null.
    961     return node.getNodeName();
    962   }
    963 
    964   /**
    965    * Given a node handle, return the XPath node name.  This should be
    966    * the name as described by the XPath data model, NOT the DOM-style
    967    * name.
    968    *
    969    * @param nodeHandle the id of the node.
    970    * @return String Name of this node, which may be an empty string.
    971    */
    972   public String getNodeNameX(int nodeHandle)
    973   {
    974 
    975     String name;
    976     short type = getNodeType(nodeHandle);
    977 
    978     switch (type)
    979     {
    980     case DTM.NAMESPACE_NODE :
    981     {
    982       Node node = getNode(nodeHandle);
    983 
    984       // assume not null.
    985       name = node.getNodeName();
    986       if(name.startsWith("xmlns:"))
    987       {
    988         name = QName.getLocalPart(name);
    989       }
    990       else if(name.equals("xmlns"))
    991       {
    992         name = "";
    993       }
    994     }
    995     break;
    996     case DTM.ATTRIBUTE_NODE :
    997     case DTM.ELEMENT_NODE :
    998     case DTM.ENTITY_REFERENCE_NODE :
    999     case DTM.PROCESSING_INSTRUCTION_NODE :
   1000     {
   1001       Node node = getNode(nodeHandle);
   1002 
   1003       // assume not null.
   1004       name = node.getNodeName();
   1005     }
   1006     break;
   1007     default :
   1008       name = "";
   1009     }
   1010 
   1011     return name;
   1012   }
   1013 
   1014   /**
   1015    * Given a node handle, return its XPath-style localname.
   1016    * (As defined in Namespaces, this is the portion of the name after any
   1017    * colon character).
   1018    *
   1019    * @param nodeHandle the id of the node.
   1020    * @return String Local name of this node.
   1021    */
   1022   public String getLocalName(int nodeHandle)
   1023   {
   1024     if(JJK_NEWCODE)
   1025     {
   1026       int id=makeNodeIdentity(nodeHandle);
   1027       if(NULL==id) return null;
   1028       Node newnode=(Node)m_nodes.elementAt(id);
   1029       String newname=newnode.getLocalName();
   1030       if (null == newname)
   1031       {
   1032 	// XSLT treats PIs, and possibly other things, as having QNames.
   1033 	String qname = newnode.getNodeName();
   1034 	if('#'==qname.charAt(0))
   1035 	{
   1036 	  //  Match old default for this function
   1037 	  // This conversion may or may not be necessary
   1038 	  newname="";
   1039 	}
   1040 	else
   1041 	{
   1042 	  int index = qname.indexOf(':');
   1043 	  newname = (index < 0) ? qname : qname.substring(index + 1);
   1044 	}
   1045       }
   1046       return newname;
   1047     }
   1048     else
   1049     {
   1050       String name;
   1051       short type = getNodeType(nodeHandle);
   1052       switch (type)
   1053       {
   1054       case DTM.ATTRIBUTE_NODE :
   1055       case DTM.ELEMENT_NODE :
   1056       case DTM.ENTITY_REFERENCE_NODE :
   1057       case DTM.NAMESPACE_NODE :
   1058       case DTM.PROCESSING_INSTRUCTION_NODE :
   1059 	{
   1060 	  Node node = getNode(nodeHandle);
   1061 
   1062 	  // assume not null.
   1063 	  name = node.getLocalName();
   1064 
   1065 	  if (null == name)
   1066 	  {
   1067 	    String qname = node.getNodeName();
   1068 	    int index = qname.indexOf(':');
   1069 
   1070 	    name = (index < 0) ? qname : qname.substring(index + 1);
   1071 	  }
   1072 	}
   1073 	break;
   1074       default :
   1075 	name = "";
   1076       }
   1077       return name;
   1078     }
   1079   }
   1080 
   1081   /**
   1082    * Given a namespace handle, return the prefix that the namespace decl is
   1083    * mapping.
   1084    * Given a node handle, return the prefix used to map to the namespace.
   1085    *
   1086    * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
   1087    * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb  </p>
   1088    *
   1089    * @param nodeHandle the id of the node.
   1090    * @return String prefix of this node's name, or "" if no explicit
   1091    * namespace prefix was given.
   1092    */
   1093   public String getPrefix(int nodeHandle)
   1094   {
   1095 
   1096     String prefix;
   1097     short type = getNodeType(nodeHandle);
   1098 
   1099     switch (type)
   1100     {
   1101     case DTM.NAMESPACE_NODE :
   1102     {
   1103       Node node = getNode(nodeHandle);
   1104 
   1105       // assume not null.
   1106       String qname = node.getNodeName();
   1107       int index = qname.indexOf(':');
   1108 
   1109       prefix = (index < 0) ? "" : qname.substring(index + 1);
   1110     }
   1111     break;
   1112     case DTM.ATTRIBUTE_NODE :
   1113     case DTM.ELEMENT_NODE :
   1114     {
   1115       Node node = getNode(nodeHandle);
   1116 
   1117       // assume not null.
   1118       String qname = node.getNodeName();
   1119       int index = qname.indexOf(':');
   1120 
   1121       prefix = (index < 0) ? "" : qname.substring(0, index);
   1122     }
   1123     break;
   1124     default :
   1125       prefix = "";
   1126     }
   1127 
   1128     return prefix;
   1129   }
   1130 
   1131   /**
   1132    * Given a node handle, return its DOM-style namespace URI
   1133    * (As defined in Namespaces, this is the declared URI which this node's
   1134    * prefix -- or default in lieu thereof -- was mapped to.)
   1135    *
   1136    * <p>%REVIEW% Null or ""? -sb</p>
   1137    *
   1138    * @param nodeHandle the id of the node.
   1139    * @return String URI value of this node's namespace, or null if no
   1140    * namespace was resolved.
   1141    */
   1142   public String getNamespaceURI(int nodeHandle)
   1143   {
   1144     if(JJK_NEWCODE)
   1145     {
   1146       int id=makeNodeIdentity(nodeHandle);
   1147       if(id==NULL) return null;
   1148       Node node=(Node)m_nodes.elementAt(id);
   1149       return node.getNamespaceURI();
   1150     }
   1151     else
   1152     {
   1153       String nsuri;
   1154       short type = getNodeType(nodeHandle);
   1155 
   1156       switch (type)
   1157       {
   1158       case DTM.ATTRIBUTE_NODE :
   1159       case DTM.ELEMENT_NODE :
   1160       case DTM.ENTITY_REFERENCE_NODE :
   1161       case DTM.NAMESPACE_NODE :
   1162       case DTM.PROCESSING_INSTRUCTION_NODE :
   1163 	{
   1164 	  Node node = getNode(nodeHandle);
   1165 
   1166 	  // assume not null.
   1167 	  nsuri = node.getNamespaceURI();
   1168 
   1169 	  // %TBD% Handle DOM1?
   1170 	}
   1171 	break;
   1172       default :
   1173 	nsuri = null;
   1174       }
   1175 
   1176       return nsuri;
   1177     }
   1178 
   1179   }
   1180 
   1181   /** Utility function: Given a DOM Text node, determine whether it is
   1182    * logically followed by another Text or CDATASection node. This may
   1183    * involve traversing into Entity References.
   1184    *
   1185    * %REVIEW% DOM Level 3 is expected to add functionality which may
   1186    * allow us to retire this.
   1187    */
   1188   private Node logicalNextDOMTextNode(Node n)
   1189   {
   1190         Node p=n.getNextSibling();
   1191         if(p==null)
   1192         {
   1193                 // Walk out of any EntityReferenceNodes that ended with text
   1194                 for(n=n.getParentNode();
   1195                         n!=null && ENTITY_REFERENCE_NODE == n.getNodeType();
   1196                         n=n.getParentNode())
   1197                 {
   1198                         p=n.getNextSibling();
   1199                         if(p!=null)
   1200                                 break;
   1201                 }
   1202         }
   1203         n=p;
   1204         while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType())
   1205         {
   1206                 // Walk into any EntityReferenceNodes that start with text
   1207                 if(n.hasChildNodes())
   1208                         n=n.getFirstChild();
   1209                 else
   1210                         n=n.getNextSibling();
   1211         }
   1212         if(n!=null)
   1213         {
   1214                 // Found a logical next sibling. Is it text?
   1215                 int ntype=n.getNodeType();
   1216                 if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype)
   1217                         n=null;
   1218         }
   1219         return n;
   1220   }
   1221 
   1222   /**
   1223    * Given a node handle, return its node value. This is mostly
   1224    * as defined by the DOM, but may ignore some conveniences.
   1225    * <p>
   1226    *
   1227    * @param nodeHandle The node id.
   1228    * @return String Value of this node, or null if not
   1229    * meaningful for this node type.
   1230    */
   1231   public String getNodeValue(int nodeHandle)
   1232   {
   1233     // The _type(nodeHandle) call was taking the lion's share of our
   1234     // time, and was wrong anyway since it wasn't coverting handle to
   1235     // identity. Inlined it.
   1236     int type = _exptype(makeNodeIdentity(nodeHandle));
   1237     type=(NULL != type) ? getNodeType(nodeHandle) : NULL;
   1238 
   1239     if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type)
   1240       return getNode(nodeHandle).getNodeValue();
   1241 
   1242     // If this is a DTM text node, it may be made of multiple DOM text
   1243     // nodes -- including navigating into Entity References. DOM2DTM
   1244     // records the first node in the sequence and requires that we
   1245     // pick up the others when we retrieve the DTM node's value.
   1246     //
   1247     // %REVIEW% DOM Level 3 is expected to add a "whole text"
   1248     // retrieval method which performs this function for us.
   1249     Node node = getNode(nodeHandle);
   1250     Node n=logicalNextDOMTextNode(node);
   1251     if(n==null)
   1252       return node.getNodeValue();
   1253 
   1254     FastStringBuffer buf = StringBufferPool.get();
   1255         buf.append(node.getNodeValue());
   1256     while(n!=null)
   1257     {
   1258       buf.append(n.getNodeValue());
   1259       n=logicalNextDOMTextNode(n);
   1260     }
   1261     String s = (buf.length() > 0) ? buf.toString() : "";
   1262     StringBufferPool.free(buf);
   1263     return s;
   1264   }
   1265 
   1266   /**
   1267    *   A document type declaration information item has the following properties:
   1268    *
   1269    *     1. [system identifier] The system identifier of the external subset, if
   1270    *        it exists. Otherwise this property has no value.
   1271    *
   1272    * @return the system identifier String object, or null if there is none.
   1273    */
   1274   public String getDocumentTypeDeclarationSystemIdentifier()
   1275   {
   1276 
   1277     Document doc;
   1278 
   1279     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
   1280       doc = (Document) m_root;
   1281     else
   1282       doc = m_root.getOwnerDocument();
   1283 
   1284     if (null != doc)
   1285     {
   1286       DocumentType dtd = doc.getDoctype();
   1287 
   1288       if (null != dtd)
   1289       {
   1290         return dtd.getSystemId();
   1291       }
   1292     }
   1293 
   1294     return null;
   1295   }
   1296 
   1297   /**
   1298    * Return the public identifier of the external subset,
   1299    * normalized as described in 4.2.2 External Entities [XML]. If there is
   1300    * no external subset or if it has no public identifier, this property
   1301    * has no value.
   1302    *
   1303    * @return the public identifier String object, or null if there is none.
   1304    */
   1305   public String getDocumentTypeDeclarationPublicIdentifier()
   1306   {
   1307 
   1308     Document doc;
   1309 
   1310     if (m_root.getNodeType() == Node.DOCUMENT_NODE)
   1311       doc = (Document) m_root;
   1312     else
   1313       doc = m_root.getOwnerDocument();
   1314 
   1315     if (null != doc)
   1316     {
   1317       DocumentType dtd = doc.getDoctype();
   1318 
   1319       if (null != dtd)
   1320       {
   1321         return dtd.getPublicId();
   1322       }
   1323     }
   1324 
   1325     return null;
   1326   }
   1327 
   1328   /**
   1329    * Returns the <code>Element</code> whose <code>ID</code> is given by
   1330    * <code>elementId</code>. If no such element exists, returns
   1331    * <code>DTM.NULL</code>. Behavior is not defined if more than one element
   1332    * has this <code>ID</code>. Attributes (including those
   1333    * with the name "ID") are not of type ID unless so defined by DTD/Schema
   1334    * information available to the DTM implementation.
   1335    * Implementations that do not know whether attributes are of type ID or
   1336    * not are expected to return <code>DTM.NULL</code>.
   1337    *
   1338    * <p>%REVIEW% Presumably IDs are still scoped to a single document,
   1339    * and this operation searches only within a single document, right?
   1340    * Wouldn't want collisions between DTMs in the same process.</p>
   1341    *
   1342    * @param elementId The unique <code>id</code> value for an element.
   1343    * @return The handle of the matching element.
   1344    */
   1345   public int getElementById(String elementId)
   1346   {
   1347 
   1348     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
   1349         ? (Document) m_root : m_root.getOwnerDocument();
   1350 
   1351     if(null != doc)
   1352     {
   1353       Node elem = doc.getElementById(elementId);
   1354       if(null != elem)
   1355       {
   1356         int elemHandle = getHandleFromNode(elem);
   1357 
   1358         if(DTM.NULL == elemHandle)
   1359         {
   1360           int identity = m_nodes.size()-1;
   1361           while (DTM.NULL != (identity = getNextNodeIdentity(identity)))
   1362           {
   1363             Node node = getNode(identity);
   1364             if(node == elem)
   1365             {
   1366               elemHandle = getHandleFromNode(elem);
   1367               break;
   1368             }
   1369            }
   1370         }
   1371 
   1372         return elemHandle;
   1373       }
   1374 
   1375     }
   1376     return DTM.NULL;
   1377   }
   1378 
   1379   /**
   1380    * The getUnparsedEntityURI function returns the URI of the unparsed
   1381    * entity with the specified name in the same document as the context
   1382    * node (see [3.3 Unparsed Entities]). It returns the empty string if
   1383    * there is no such entity.
   1384    * <p>
   1385    * XML processors may choose to use the System Identifier (if one
   1386    * is provided) to resolve the entity, rather than the URI in the
   1387    * Public Identifier. The details are dependent on the processor, and
   1388    * we would have to support some form of plug-in resolver to handle
   1389    * this properly. Currently, we simply return the System Identifier if
   1390    * present, and hope that it a usable URI or that our caller can
   1391    * map it to one.
   1392    * TODO: Resolve Public Identifiers... or consider changing function name.
   1393    * <p>
   1394    * If we find a relative URI
   1395    * reference, XML expects it to be resolved in terms of the base URI
   1396    * of the document. The DOM doesn't do that for us, and it isn't
   1397    * entirely clear whether that should be done here; currently that's
   1398    * pushed up to a higher level of our application. (Note that DOM Level
   1399    * 1 didn't store the document's base URI.)
   1400    * TODO: Consider resolving Relative URIs.
   1401    * <p>
   1402    * (The DOM's statement that "An XML processor may choose to
   1403    * completely expand entities before the structure model is passed
   1404    * to the DOM" refers only to parsed entities, not unparsed, and hence
   1405    * doesn't affect this function.)
   1406    *
   1407    * @param name A string containing the Entity Name of the unparsed
   1408    * entity.
   1409    *
   1410    * @return String containing the URI of the Unparsed Entity, or an
   1411    * empty string if no such entity exists.
   1412    */
   1413   public String getUnparsedEntityURI(String name)
   1414   {
   1415 
   1416     String url = "";
   1417     Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE)
   1418         ? (Document) m_root : m_root.getOwnerDocument();
   1419 
   1420     if (null != doc)
   1421     {
   1422       DocumentType doctype = doc.getDoctype();
   1423 
   1424       if (null != doctype)
   1425       {
   1426         NamedNodeMap entities = doctype.getEntities();
   1427         if(null == entities)
   1428           return url;
   1429         Entity entity = (Entity) entities.getNamedItem(name);
   1430         if(null == entity)
   1431           return url;
   1432 
   1433         String notationName = entity.getNotationName();
   1434 
   1435         if (null != notationName)  // then it's unparsed
   1436         {
   1437           // The draft says: "The XSLT processor may use the public
   1438           // identifier to generate a URI for the entity instead of the URI
   1439           // specified in the system identifier. If the XSLT processor does
   1440           // not use the public identifier to generate the URI, it must use
   1441           // the system identifier; if the system identifier is a relative
   1442           // URI, it must be resolved into an absolute URI using the URI of
   1443           // the resource containing the entity declaration as the base
   1444           // URI [RFC2396]."
   1445           // So I'm falling a bit short here.
   1446           url = entity.getSystemId();
   1447 
   1448           if (null == url)
   1449           {
   1450             url = entity.getPublicId();
   1451           }
   1452           else
   1453           {
   1454             // This should be resolved to an absolute URL, but that's hard
   1455             // to do from here.
   1456           }
   1457         }
   1458       }
   1459     }
   1460 
   1461     return url;
   1462   }
   1463 
   1464   /**
   1465    *     5. [specified] A flag indicating whether this attribute was actually
   1466    *        specified in the start-tag of its element, or was defaulted from the
   1467    *        DTD.
   1468    *
   1469    * @param attributeHandle the attribute handle
   1470    * @return <code>true</code> if the attribute was specified;
   1471    *         <code>false</code> if it was defaulted.
   1472    */
   1473   public boolean isAttributeSpecified(int attributeHandle)
   1474   {
   1475     int type = getNodeType(attributeHandle);
   1476 
   1477     if (DTM.ATTRIBUTE_NODE == type)
   1478     {
   1479       Attr attr = (Attr)getNode(attributeHandle);
   1480       return attr.getSpecified();
   1481     }
   1482     return false;
   1483   }
   1484 
   1485   /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since
   1486    * we're wrapped around an existing DOM.
   1487    *
   1488    * @param source The IncrementalSAXSource that we want to recieve events from
   1489    * on demand.
   1490    */
   1491   public void setIncrementalSAXSource(IncrementalSAXSource source)
   1492   {
   1493   }
   1494 
   1495   /** getContentHandler returns "our SAX builder" -- the thing that
   1496    * someone else should send SAX events to in order to extend this
   1497    * DTM model.
   1498    *
   1499    * @return null if this model doesn't respond to SAX events,
   1500    * "this" if the DTM object has a built-in SAX ContentHandler,
   1501    * the IncrmentalSAXSource if we're bound to one and should receive
   1502    * the SAX stream via it for incremental build purposes...
   1503    * */
   1504   public org.xml.sax.ContentHandler getContentHandler()
   1505   {
   1506       return null;
   1507   }
   1508 
   1509   /**
   1510    * Return this DTM's lexical handler.
   1511    *
   1512    * %REVIEW% Should this return null if constrution already done/begun?
   1513    *
   1514    * @return null if this model doesn't respond to lexical SAX events,
   1515    * "this" if the DTM object has a built-in SAX ContentHandler,
   1516    * the IncrementalSAXSource if we're bound to one and should receive
   1517    * the SAX stream via it for incremental build purposes...
   1518    */
   1519   public org.xml.sax.ext.LexicalHandler getLexicalHandler()
   1520   {
   1521 
   1522     return null;
   1523   }
   1524 
   1525 
   1526   /**
   1527    * Return this DTM's EntityResolver.
   1528    *
   1529    * @return null if this model doesn't respond to SAX entity ref events.
   1530    */
   1531   public org.xml.sax.EntityResolver getEntityResolver()
   1532   {
   1533 
   1534     return null;
   1535   }
   1536 
   1537   /**
   1538    * Return this DTM's DTDHandler.
   1539    *
   1540    * @return null if this model doesn't respond to SAX dtd events.
   1541    */
   1542   public org.xml.sax.DTDHandler getDTDHandler()
   1543   {
   1544 
   1545     return null;
   1546   }
   1547 
   1548   /**
   1549    * Return this DTM's ErrorHandler.
   1550    *
   1551    * @return null if this model doesn't respond to SAX error events.
   1552    */
   1553   public org.xml.sax.ErrorHandler getErrorHandler()
   1554   {
   1555 
   1556     return null;
   1557   }
   1558 
   1559   /**
   1560    * Return this DTM's DeclHandler.
   1561    *
   1562    * @return null if this model doesn't respond to SAX Decl events.
   1563    */
   1564   public org.xml.sax.ext.DeclHandler getDeclHandler()
   1565   {
   1566 
   1567     return null;
   1568   }
   1569 
   1570   /** @return true iff we're building this model incrementally (eg
   1571    * we're partnered with a IncrementalSAXSource) and thus require that the
   1572    * transformation and the parse run simultaneously. Guidance to the
   1573    * DTMManager.
   1574    * */
   1575   public boolean needsTwoThreads()
   1576   {
   1577     return false;
   1578   }
   1579 
   1580   // ========== Direct SAX Dispatch, for optimization purposes ========
   1581 
   1582   /**
   1583    * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition
   1584    * of whitespace.  Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S">
   1585    * the definition of <CODE>S</CODE></A> for details.
   1586    * @param   ch      Character to check as XML whitespace.
   1587    * @return          =true if <var>ch</var> is XML whitespace; otherwise =false.
   1588    */
   1589   private static boolean isSpace(char ch)
   1590   {
   1591     return XMLCharacterRecognizer.isWhiteSpace(ch);  // Take the easy way out for now.
   1592   }
   1593 
   1594   /**
   1595    * Directly call the
   1596    * characters method on the passed ContentHandler for the
   1597    * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
   1598    * for the definition of a node's string-value). Multiple calls to the
   1599    * ContentHandler's characters methods may well occur for a single call to
   1600    * this method.
   1601    *
   1602    * @param nodeHandle The node ID.
   1603    * @param ch A non-null reference to a ContentHandler.
   1604    *
   1605    * @throws org.xml.sax.SAXException
   1606    */
   1607   public void dispatchCharactersEvents(
   1608           int nodeHandle, org.xml.sax.ContentHandler ch,
   1609           boolean normalize)
   1610             throws org.xml.sax.SAXException
   1611   {
   1612     if(normalize)
   1613     {
   1614       XMLString str = getStringValue(nodeHandle);
   1615       str = str.fixWhiteSpace(true, true, false);
   1616       str.dispatchCharactersEvents(ch);
   1617     }
   1618     else
   1619     {
   1620       int type = getNodeType(nodeHandle);
   1621       Node node = getNode(nodeHandle);
   1622       dispatchNodeData(node, ch, 0);
   1623           // Text coalition -- a DTM text node may represent multiple
   1624           // DOM nodes.
   1625           if(TEXT_NODE == type || CDATA_SECTION_NODE == type)
   1626           {
   1627                   while( null != (node=logicalNextDOMTextNode(node)) )
   1628                   {
   1629                       dispatchNodeData(node, ch, 0);
   1630                   }
   1631           }
   1632     }
   1633   }
   1634 
   1635   /**
   1636    * Retrieve the text content of a DOM subtree, appending it into a
   1637    * user-supplied FastStringBuffer object. Note that attributes are
   1638    * not considered part of the content of an element.
   1639    * <p>
   1640    * There are open questions regarding whitespace stripping.
   1641    * Currently we make no special effort in that regard, since the standard
   1642    * DOM doesn't yet provide DTD-based information to distinguish
   1643    * whitespace-in-element-context from genuine #PCDATA. Note that we
   1644    * should probably also consider xml:space if/when we address this.
   1645    * DOM Level 3 may solve the problem for us.
   1646    * <p>
   1647    * %REVIEW% Note that as a DOM-level operation, it can be argued that this
   1648    * routine _shouldn't_ perform any processing beyond what the DOM already
   1649    * does, and that whitespace stripping and so on belong at the DTM level.
   1650    * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM.
   1651    *
   1652    * @param node Node whose subtree is to be walked, gathering the
   1653    * contents of all Text or CDATASection nodes.
   1654    */
   1655   protected static void dispatchNodeData(Node node,
   1656                                          org.xml.sax.ContentHandler ch,
   1657                                          int depth)
   1658             throws org.xml.sax.SAXException
   1659   {
   1660 
   1661     switch (node.getNodeType())
   1662     {
   1663     case Node.DOCUMENT_FRAGMENT_NODE :
   1664     case Node.DOCUMENT_NODE :
   1665     case Node.ELEMENT_NODE :
   1666     {
   1667       for (Node child = node.getFirstChild(); null != child;
   1668               child = child.getNextSibling())
   1669       {
   1670         dispatchNodeData(child, ch, depth+1);
   1671       }
   1672     }
   1673     break;
   1674     case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW%
   1675     case Node.COMMENT_NODE :
   1676       if(0 != depth)
   1677         break;
   1678         // NOTE: Because this operation works in the DOM space, it does _not_ attempt
   1679         // to perform Text Coalition. That should only be done in DTM space.
   1680     case Node.TEXT_NODE :
   1681     case Node.CDATA_SECTION_NODE :
   1682     case Node.ATTRIBUTE_NODE :
   1683       String str = node.getNodeValue();
   1684       if(ch instanceof CharacterNodeHandler)
   1685       {
   1686         ((CharacterNodeHandler)ch).characters(node);
   1687       }
   1688       else
   1689       {
   1690         ch.characters(str.toCharArray(), 0, str.length());
   1691       }
   1692       break;
   1693 //    /* case Node.PROCESSING_INSTRUCTION_NODE :
   1694 //      // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
   1695 //      break; */
   1696     default :
   1697       // ignore
   1698       break;
   1699     }
   1700   }
   1701 
   1702   TreeWalker m_walker = new TreeWalker(null);
   1703 
   1704   /**
   1705    * Directly create SAX parser events from a subtree.
   1706    *
   1707    * @param nodeHandle The node ID.
   1708    * @param ch A non-null reference to a ContentHandler.
   1709    *
   1710    * @throws org.xml.sax.SAXException
   1711    */
   1712   public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
   1713           throws org.xml.sax.SAXException
   1714   {
   1715     TreeWalker treeWalker = m_walker;
   1716     ContentHandler prevCH = treeWalker.getContentHandler();
   1717 
   1718     if(null != prevCH)
   1719     {
   1720       treeWalker = new TreeWalker(null);
   1721     }
   1722     treeWalker.setContentHandler(ch);
   1723 
   1724     try
   1725     {
   1726       Node node = getNode(nodeHandle);
   1727       treeWalker.traverseFragment(node);
   1728     }
   1729     finally
   1730     {
   1731       treeWalker.setContentHandler(null);
   1732     }
   1733   }
   1734 
   1735   public interface CharacterNodeHandler
   1736   {
   1737     public void characters(Node node)
   1738             throws org.xml.sax.SAXException;
   1739   }
   1740 
   1741   /**
   1742    * For the moment all the run time properties are ignored by this
   1743    * class.
   1744    *
   1745    * @param property a <code>String</code> value
   1746    * @param value an <code>Object</code> value
   1747    */
   1748   public void setProperty(String property, Object value)
   1749   {
   1750   }
   1751 
   1752   /**
   1753    * No source information is available for DOM2DTM, so return
   1754    * <code>null</code> here.
   1755    *
   1756    * @param node an <code>int</code> value
   1757    * @return null
   1758    */
   1759   public SourceLocator getSourceLocatorFor(int node)
   1760   {
   1761     return null;
   1762   }
   1763 
   1764 }
   1765 
   1766 
   1767