Home | History | Annotate | Download | only in sax2dtm
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $
     20  */
     21 package org.apache.xml.dtm.ref.sax2dtm;
     22 
     23 import javax.xml.transform.Source;
     24 
     25 import org.apache.xml.dtm.DTM;
     26 import org.apache.xml.dtm.DTMManager;
     27 import org.apache.xml.dtm.DTMWSFilter;
     28 import org.apache.xml.utils.IntStack;
     29 import org.apache.xml.utils.IntVector;
     30 import org.apache.xml.utils.StringVector;
     31 import org.apache.xml.utils.XMLStringFactory;
     32 
     33 import org.xml.sax.SAXException;
     34 
     35 /**
     36  * This is a subclass of SAX2DTM which has been modified to meet the needs of
     37  * Result Tree Frameworks (RTFs). The differences are:
     38  *
     39  * 1) Multiple XML trees may be appended to the single DTM. This means
     40  * that the root node of each document is _not_ node 0. Some code has
     41  * had to be deoptimized to support this mode of operation, and an
     42  * explicit mechanism for obtaining the Node Handle of the root node
     43  * has been provided.
     44  *
     45  * 2) A stack of these documents is maintained, allowing us to "tail-prune" the
     46  * most recently added trees off the end of the DTM as stylesheet elements
     47  * (and thus variable contexts) are exited.
     48  *
     49  * PLEASE NOTE that this class may be _heavily_ dependent upon the
     50  * internals of the SAX2DTM superclass, and must be maintained in
     51  * parallel with that code.  Arguably, they should be conditionals
     52  * within a single class... but they have deen separated for
     53  * performance reasons. (In fact, one could even argue about which is
     54  * the superclass and which is the subclass; the current arrangement
     55  * is as much about preserving stability of existing code during
     56  * development as anything else.)
     57  *
     58  * %REVIEW% In fact, since the differences are so minor, I think it
     59  * may be possible/practical to fold them back into the base
     60  * SAX2DTM. Consider that as a future code-size optimization.
     61  * */
     62 public class SAX2RTFDTM extends SAX2DTM
     63 {
     64   /** Set true to monitor SAX events and similar diagnostic info. */
     65   private static final boolean DEBUG = false;
     66 
     67   /** Most recently started Document, or null if the DTM is empty.  */
     68   private int m_currentDocumentNode=NULL;
     69 
     70   /** Tail-pruning mark: Number of nodes in use */
     71   IntStack mark_size=new IntStack();
     72   /** Tail-pruning mark: Number of data items in use */
     73   IntStack mark_data_size=new IntStack();
     74   /** Tail-pruning mark: Number of size-of-data fields in use */
     75   IntStack mark_char_size=new IntStack();
     76   /** Tail-pruning mark: Number of dataOrQName slots in use */
     77   IntStack mark_doq_size=new IntStack();
     78   /** Tail-pruning mark: Number of namespace declaration sets in use
     79    * %REVIEW% I don't think number of NS sets is ever different from number
     80    * of NS elements. We can probabably reduce these to a single stack and save
     81    * some storage.
     82    * */
     83   IntStack mark_nsdeclset_size=new IntStack();
     84   /** Tail-pruning mark: Number of naespace declaration elements in use
     85    * %REVIEW% I don't think number of NS sets is ever different from number
     86    * of NS elements. We can probabably reduce these to a single stack and save
     87    * some storage.
     88    */
     89   IntStack mark_nsdeclelem_size=new IntStack();
     90 
     91   /**
     92    * Tail-pruning mark:  initial number of nodes in use
     93    */
     94   int m_emptyNodeCount;
     95 
     96   /**
     97    * Tail-pruning mark:  initial number of namespace declaration sets
     98    */
     99   int m_emptyNSDeclSetCount;
    100 
    101   /**
    102    * Tail-pruning mark:  initial number of namespace declaration elements
    103    */
    104   int m_emptyNSDeclSetElemsCount;
    105 
    106   /**
    107    * Tail-pruning mark:  initial number of data items in use
    108    */
    109   int m_emptyDataCount;
    110 
    111   /**
    112    * Tail-pruning mark:  initial number of characters in use
    113    */
    114   int m_emptyCharsCount;
    115 
    116   /**
    117    * Tail-pruning mark:  default initial number of dataOrQName slots in use
    118    */
    119   int m_emptyDataQNCount;
    120 
    121   public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
    122                  DTMWSFilter whiteSpaceFilter,
    123                  XMLStringFactory xstringfactory,
    124                  boolean doIndexing)
    125   {
    126     super(mgr, source, dtmIdentity, whiteSpaceFilter,
    127           xstringfactory, doIndexing);
    128 
    129     // NEVER track source locators for RTFs; they aren't meaningful. I think.
    130     // (If we did track them, we'd need to tail-prune these too.)
    131     //org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
    132     m_useSourceLocationProperty=false;
    133     m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
    134                                                      : null;
    135     m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
    136     m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
    137 
    138     // Record initial sizes of fields that are pushed and restored
    139     // for RTF tail-pruning.  More entries can be popped than pushed, so
    140     // we need this to mark the primordial state of the DTM.
    141     m_emptyNodeCount = m_size;
    142     m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
    143                                  ? 0 : m_namespaceDeclSets.size();
    144     m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
    145                                       ? 0 : m_namespaceDeclSetElements.size();
    146     m_emptyDataCount = m_data.size();
    147     m_emptyCharsCount = m_chars.size();
    148     m_emptyDataQNCount = m_dataOrQName.size();
    149   }
    150 
    151   /**
    152    * Given a DTM, find the owning document node. In the case of
    153    * SAX2RTFDTM, which may contain multiple documents, this returns
    154    * the <b>most recently started</b> document, or null if the DTM is
    155    * empty or no document is currently under construction.
    156    *
    157    * %REVIEW% Should we continue to report the most recent after
    158    * construction has ended? I think not, given that it may have been
    159    * tail-pruned.
    160    *
    161    *  @return int Node handle of Document node, or null if this DTM does not
    162    *  contain an "active" document.
    163    * */
    164   public int getDocument()
    165   {
    166     return makeNodeHandle(m_currentDocumentNode);
    167   }
    168 
    169   /**
    170    * Given a node handle, find the owning document node, using DTM semantics
    171    * (Document owns itself) rather than DOM semantics (Document has no owner).
    172    *
    173    * (I'm counting on the fact that getOwnerDocument() is implemented on top
    174    * of this call, in the superclass, to avoid having to rewrite that one.
    175    * Be careful if that code changes!)
    176    *
    177    * @param nodeHandle the id of the node.
    178    * @return int Node handle of owning document
    179    */
    180   public int getDocumentRoot(int nodeHandle)
    181   {
    182     for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
    183       if (_type(id)==DTM.DOCUMENT_NODE) {
    184         return makeNodeHandle(id);
    185       }
    186     }
    187 
    188     return DTM.NULL; // Safety net; should never happen
    189   }
    190 
    191   /**
    192    * Given a node identifier, find the owning document node.  Unlike the DOM,
    193    * this considers the owningDocument of a Document to be itself. Note that
    194    * in shared DTMs this may not be zero.
    195    *
    196    * @param nodeIdentifier the id of the starting node.
    197    * @return int Node identifier of the root of this DTM tree
    198    */
    199   protected int _documentRoot(int nodeIdentifier)
    200   {
    201     if(nodeIdentifier==NULL) return NULL;
    202 
    203     for (int parent=_parent(nodeIdentifier);
    204          parent!=NULL;
    205          nodeIdentifier=parent,parent=_parent(nodeIdentifier))
    206       ;
    207 
    208     return nodeIdentifier;
    209   }
    210 
    211   /**
    212    * Receive notification of the beginning of a new RTF document.
    213    *
    214    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
    215    * might want to consider folding the start/endDocument changes back
    216    * into the main SAX2DTM so we don't have to expose so many fields
    217    * (even as Protected) and carry the additional code.
    218    *
    219    * @throws SAXException Any SAX exception, possibly
    220    *            wrapping another exception.
    221    * @see org.xml.sax.ContentHandler#startDocument
    222    * */
    223   public void startDocument() throws SAXException
    224   {
    225     // Re-initialize the tree append process
    226     m_endDocumentOccured = false;
    227     m_prefixMappings = new java.util.Vector();
    228     m_contextIndexes = new IntStack();
    229     m_parents = new IntStack();
    230 
    231     m_currentDocumentNode=m_size;
    232     super.startDocument();
    233   }
    234 
    235   /**
    236    * Receive notification of the end of the document.
    237    *
    238    * %REVIEW% Y'know, this isn't all that much of a deoptimization. We
    239    * might want to consider folding the start/endDocument changes back
    240    * into the main SAX2DTM so we don't have to expose so many fields
    241    * (even as Protected).
    242    *
    243    * @throws SAXException Any SAX exception, possibly
    244    *            wrapping another exception.
    245    * @see org.xml.sax.ContentHandler#endDocument
    246    * */
    247   public void endDocument() throws SAXException
    248   {
    249     charactersFlush();
    250 
    251     m_nextsib.setElementAt(NULL,m_currentDocumentNode);
    252 
    253     if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
    254       m_firstch.setElementAt(NULL,m_currentDocumentNode);
    255 
    256     if (DTM.NULL != m_previous)
    257       m_nextsib.setElementAt(DTM.NULL,m_previous);
    258 
    259     m_parents = null;
    260     m_prefixMappings = null;
    261     m_contextIndexes = null;
    262 
    263     m_currentDocumentNode= NULL; // no longer open
    264     m_endDocumentOccured = true;
    265   }
    266 
    267 
    268   /** "Tail-pruning" support for RTFs.
    269    *
    270    * This function pushes information about the current size of the
    271    * DTM's data structures onto a stack, for use by popRewindMark()
    272    * (which see).
    273    *
    274    * %REVIEW% I have no idea how to rewind m_elemIndexes. However,
    275    * RTFs will not be indexed, so I can simply panic if that case
    276    * arises. Hey, it works...
    277    * */
    278   public void pushRewindMark()
    279   {
    280     if(m_indexing || m_elemIndexes!=null)
    281       throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
    282 
    283     // Values from DTMDefaultBase
    284     // %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
    285     mark_size.push(m_size);
    286     mark_nsdeclset_size.push((m_namespaceDeclSets==null)
    287                                    ? 0
    288                                    : m_namespaceDeclSets.size());
    289     mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
    290                                    ? 0
    291                                    : m_namespaceDeclSetElements.size());
    292 
    293     // Values from SAX2DTM
    294     mark_data_size.push(m_data.size());
    295     mark_char_size.push(m_chars.size());
    296     mark_doq_size.push(m_dataOrQName.size());
    297   }
    298 
    299   /** "Tail-pruning" support for RTFs.
    300    *
    301    * This function pops the information previously saved by
    302    * pushRewindMark (which see) and uses it to discard all nodes added
    303    * to the DTM after that time. We expect that this will allow us to
    304    * reuse storage more effectively.
    305    *
    306    * This is _not_ intended to be called while a document is still being
    307    * constructed -- only between endDocument and the next startDocument
    308    *
    309    * %REVIEW% WARNING: This is the first use of some of the truncation
    310    * methods.  If Xalan blows up after this is called, that's a likely
    311    * place to check.
    312    *
    313    * %REVIEW% Our original design for DTMs permitted them to share
    314    * string pools.  If there any risk that this might be happening, we
    315    * can _not_ rewind and recover the string storage. One solution
    316    * might to assert that DTMs used for RTFs Must Not take advantage
    317    * of that feature, but this seems excessively fragile. Another, much
    318    * less attractive, would be to just let them leak... Nah.
    319    *
    320    * @return true if and only if the pop completely emptied the
    321    * RTF. That response is used when determining how to unspool
    322    * RTF-started-while-RTF-open situations.
    323    * */
    324   public boolean popRewindMark()
    325   {
    326     boolean top=mark_size.empty();
    327 
    328     m_size=top ? m_emptyNodeCount : mark_size.pop();
    329     m_exptype.setSize(m_size);
    330     m_firstch.setSize(m_size);
    331     m_nextsib.setSize(m_size);
    332     m_prevsib.setSize(m_size);
    333     m_parent.setSize(m_size);
    334 
    335     m_elemIndexes=null;
    336 
    337     int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
    338     if (m_namespaceDeclSets!=null) {
    339       m_namespaceDeclSets.setSize(ds);
    340     }
    341 
    342     int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
    343     if (m_namespaceDeclSetElements!=null) {
    344       m_namespaceDeclSetElements.setSize(ds1);
    345     }
    346 
    347     // Values from SAX2DTM - m_data always has a reserved entry
    348     m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
    349     m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
    350     m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
    351 
    352     // Return true iff DTM now empty
    353     return m_size==0;
    354   }
    355 
    356   /** @return true if a DTM tree is currently under construction.
    357    * */
    358   public boolean isTreeIncomplete()
    359   {
    360     return !m_endDocumentOccured;
    361   }
    362 }
    363