Home | History | Annotate | Download | only in utils
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: TreeWalker.java 468655 2006-10-28 07:12:06Z minchau $
     20  */
     21 package org.apache.xml.utils;
     22 
     23 import java.io.File;
     24 
     25 import org.w3c.dom.Comment;
     26 import org.w3c.dom.Element;
     27 import org.w3c.dom.EntityReference;
     28 import org.w3c.dom.NamedNodeMap;
     29 import org.w3c.dom.Node;
     30 import org.w3c.dom.ProcessingInstruction;
     31 import org.w3c.dom.Text;
     32 
     33 import org.xml.sax.ContentHandler;
     34 import org.xml.sax.Locator;
     35 import org.xml.sax.ext.LexicalHandler;
     36 import org.xml.sax.helpers.LocatorImpl;
     37 
     38 /**
     39  * This class does a pre-order walk of the DOM tree, calling a ContentHandler
     40  * interface as it goes.
     41  * @xsl.usage advanced
     42  */
     43 
     44 public class TreeWalker
     45 {
     46 
     47   /** Local reference to a ContentHandler          */
     48   private ContentHandler m_contentHandler = null;
     49 
     50   // ARGHH!!  JAXP Uses Xerces without setting the namespace processing to ON!
     51   // DOM2Helper m_dh = new DOM2Helper();
     52 
     53   /** DomHelper for this TreeWalker          */
     54   protected DOMHelper m_dh;
     55 
     56         /** Locator object for this TreeWalker          */
     57         private LocatorImpl m_locator = new LocatorImpl();
     58 
     59   /**
     60    * Get the ContentHandler used for the tree walk.
     61    *
     62    * @return the ContentHandler used for the tree walk
     63    */
     64   public ContentHandler getContentHandler()
     65   {
     66     return m_contentHandler;
     67   }
     68 
     69   /**
     70    * Get the ContentHandler used for the tree walk.
     71    *
     72    * @return the ContentHandler used for the tree walk
     73    */
     74   public void setContentHandler(ContentHandler ch)
     75   {
     76     m_contentHandler = ch;
     77   }
     78 
     79         /**
     80    * Constructor.
     81    * @param   contentHandler The implemention of the
     82    * @param   systemId System identifier for the document.
     83    * contentHandler operation (toXMLString, digest, ...)
     84    */
     85   public TreeWalker(ContentHandler contentHandler, DOMHelper dh, String systemId)
     86   {
     87     this.m_contentHandler = contentHandler;
     88     m_contentHandler.setDocumentLocator(m_locator);
     89     if (systemId != null)
     90         m_locator.setSystemId(systemId);
     91     else {
     92         try {
     93           // Bug see Bugzilla  26741
     94           m_locator.setSystemId(System.getProperty("user.dir") + File.separator + "dummy.xsl");
     95          }
     96          catch (SecurityException se) {// user.dir not accessible from applet
     97          }
     98     }
     99     m_dh = dh;
    100   }
    101 
    102   /**
    103    * Constructor.
    104    * @param   contentHandler The implemention of the
    105    * contentHandler operation (toXMLString, digest, ...)
    106    */
    107   public TreeWalker(ContentHandler contentHandler, DOMHelper dh)
    108   {
    109     this.m_contentHandler = contentHandler;
    110     m_contentHandler.setDocumentLocator(m_locator);
    111     try {
    112        // Bug see Bugzilla  26741
    113       m_locator.setSystemId(System.getProperty("user.dir") + File.separator + "dummy.xsl");
    114     }
    115     catch (SecurityException se){// user.dir not accessible from applet
    116     }
    117     m_dh = dh;
    118   }
    119 
    120   /**
    121    * Constructor.
    122    * @param   contentHandler The implemention of the
    123    * contentHandler operation (toXMLString, digest, ...)
    124    */
    125   public TreeWalker(ContentHandler contentHandler)
    126   {
    127     this.m_contentHandler = contentHandler;
    128                 if (m_contentHandler != null)
    129                         m_contentHandler.setDocumentLocator(m_locator);
    130                 try {
    131                    // Bug see Bugzilla  26741
    132                   m_locator.setSystemId(System.getProperty("user.dir") + File.separator + "dummy.xsl");
    133                 }
    134                 catch (SecurityException se){// user.dir not accessible from applet
    135 
    136     }
    137     m_dh = new DOM2Helper();
    138   }
    139 
    140   /**
    141    * Perform a pre-order traversal non-recursive style.
    142    *
    143    * Note that TreeWalker assumes that the subtree is intended to represent
    144    * a complete (though not necessarily well-formed) document and, during a
    145    * traversal, startDocument and endDocument will always be issued to the
    146    * SAX listener.
    147    *
    148    * @param pos Node in the tree where to start traversal
    149    *
    150    * @throws TransformerException
    151    */
    152   public void traverse(Node pos) throws org.xml.sax.SAXException
    153   {
    154         this.m_contentHandler.startDocument();
    155 
    156         traverseFragment(pos);
    157 
    158         this.m_contentHandler.endDocument();
    159   }
    160 
    161   /**
    162    * Perform a pre-order traversal non-recursive style.
    163    *
    164    * In contrast to the traverse() method this method will not issue
    165    * startDocument() and endDocument() events to the SAX listener.
    166    *
    167    * @param pos Node in the tree where to start traversal
    168    *
    169    * @throws TransformerException
    170    */
    171   public void traverseFragment(Node pos) throws org.xml.sax.SAXException
    172   {
    173     Node top = pos;
    174 
    175     while (null != pos)
    176     {
    177       startNode(pos);
    178 
    179       Node nextNode = pos.getFirstChild();
    180 
    181       while (null == nextNode)
    182       {
    183         endNode(pos);
    184 
    185         if (top.equals(pos))
    186           break;
    187 
    188         nextNode = pos.getNextSibling();
    189 
    190         if (null == nextNode)
    191         {
    192           pos = pos.getParentNode();
    193 
    194           if ((null == pos) || (top.equals(pos)))
    195           {
    196             if (null != pos)
    197               endNode(pos);
    198 
    199             nextNode = null;
    200 
    201             break;
    202           }
    203         }
    204       }
    205 
    206       pos = nextNode;
    207     }
    208   }
    209 
    210   /**
    211    * Perform a pre-order traversal non-recursive style.
    212 
    213    * Note that TreeWalker assumes that the subtree is intended to represent
    214    * a complete (though not necessarily well-formed) document and, during a
    215    * traversal, startDocument and endDocument will always be issued to the
    216    * SAX listener.
    217    *
    218    * @param pos Node in the tree where to start traversal
    219    * @param top Node in the tree where to end traversal
    220    *
    221    * @throws TransformerException
    222    */
    223   public void traverse(Node pos, Node top) throws org.xml.sax.SAXException
    224   {
    225 
    226 	this.m_contentHandler.startDocument();
    227 
    228     while (null != pos)
    229     {
    230       startNode(pos);
    231 
    232       Node nextNode = pos.getFirstChild();
    233 
    234       while (null == nextNode)
    235       {
    236         endNode(pos);
    237 
    238         if ((null != top) && top.equals(pos))
    239           break;
    240 
    241         nextNode = pos.getNextSibling();
    242 
    243         if (null == nextNode)
    244         {
    245           pos = pos.getParentNode();
    246 
    247           if ((null == pos) || ((null != top) && top.equals(pos)))
    248           {
    249             nextNode = null;
    250 
    251             break;
    252           }
    253         }
    254       }
    255 
    256       pos = nextNode;
    257     }
    258     this.m_contentHandler.endDocument();
    259   }
    260 
    261   /** Flag indicating whether following text to be processed is raw text          */
    262   boolean nextIsRaw = false;
    263 
    264   /**
    265    * Optimized dispatch of characters.
    266    */
    267   private final void dispatachChars(Node node)
    268      throws org.xml.sax.SAXException
    269   {
    270     if(m_contentHandler instanceof org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)
    271     {
    272       ((org.apache.xml.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)m_contentHandler).characters(node);
    273     }
    274     else
    275     {
    276       String data = ((Text) node).getData();
    277       this.m_contentHandler.characters(data.toCharArray(), 0, data.length());
    278     }
    279   }
    280 
    281   /**
    282    * Start processing given node
    283    *
    284    *
    285    * @param node Node to process
    286    *
    287    * @throws org.xml.sax.SAXException
    288    */
    289   protected void startNode(Node node) throws org.xml.sax.SAXException
    290   {
    291 
    292     if (m_contentHandler instanceof NodeConsumer)
    293     {
    294       ((NodeConsumer) m_contentHandler).setOriginatingNode(node);
    295     }
    296 
    297                 if (node instanceof Locator)
    298                 {
    299                         Locator loc = (Locator)node;
    300                         m_locator.setColumnNumber(loc.getColumnNumber());
    301                         m_locator.setLineNumber(loc.getLineNumber());
    302                         m_locator.setPublicId(loc.getPublicId());
    303                         m_locator.setSystemId(loc.getSystemId());
    304                 }
    305                 else
    306                 {
    307                         m_locator.setColumnNumber(0);
    308       m_locator.setLineNumber(0);
    309                 }
    310 
    311     switch (node.getNodeType())
    312     {
    313     case Node.COMMENT_NODE :
    314     {
    315       String data = ((Comment) node).getData();
    316 
    317       if (m_contentHandler instanceof LexicalHandler)
    318       {
    319         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
    320 
    321         lh.comment(data.toCharArray(), 0, data.length());
    322       }
    323     }
    324     break;
    325     case Node.DOCUMENT_FRAGMENT_NODE :
    326 
    327       // ??;
    328       break;
    329     case Node.DOCUMENT_NODE :
    330 
    331       break;
    332     case Node.ELEMENT_NODE :
    333       NamedNodeMap atts = ((Element) node).getAttributes();
    334       int nAttrs = atts.getLength();
    335       // System.out.println("TreeWalker#startNode: "+node.getNodeName());
    336 
    337       for (int i = 0; i < nAttrs; i++)
    338       {
    339         Node attr = atts.item(i);
    340         String attrName = attr.getNodeName();
    341 
    342         // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue());
    343         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
    344         {
    345           // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue());
    346           int index;
    347           // Use "" instead of null, as Xerces likes "" for the
    348           // name of the default namespace.  Fix attributed
    349           // to "Steven Murray" <smurray (at) ebt.com>.
    350           String prefix = (index = attrName.indexOf(":")) < 0
    351                           ? "" : attrName.substring(index + 1);
    352 
    353           this.m_contentHandler.startPrefixMapping(prefix,
    354                                                    attr.getNodeValue());
    355         }
    356 
    357       }
    358 
    359       // System.out.println("m_dh.getNamespaceOfNode(node): "+m_dh.getNamespaceOfNode(node));
    360       // System.out.println("m_dh.getLocalNameOfNode(node): "+m_dh.getLocalNameOfNode(node));
    361       String ns = m_dh.getNamespaceOfNode(node);
    362       if(null == ns)
    363         ns = "";
    364       this.m_contentHandler.startElement(ns,
    365                                          m_dh.getLocalNameOfNode(node),
    366                                          node.getNodeName(),
    367                                          new AttList(atts, m_dh));
    368       break;
    369     case Node.PROCESSING_INSTRUCTION_NODE :
    370     {
    371       ProcessingInstruction pi = (ProcessingInstruction) node;
    372       String name = pi.getNodeName();
    373 
    374       // String data = pi.getData();
    375       if (name.equals("xslt-next-is-raw"))
    376       {
    377         nextIsRaw = true;
    378       }
    379       else
    380       {
    381         this.m_contentHandler.processingInstruction(pi.getNodeName(),
    382                                                     pi.getData());
    383       }
    384     }
    385     break;
    386     case Node.CDATA_SECTION_NODE :
    387     {
    388       boolean isLexH = (m_contentHandler instanceof LexicalHandler);
    389       LexicalHandler lh = isLexH
    390                           ? ((LexicalHandler) this.m_contentHandler) : null;
    391 
    392       if (isLexH)
    393       {
    394         lh.startCDATA();
    395       }
    396 
    397       dispatachChars(node);
    398 
    399       {
    400         if (isLexH)
    401         {
    402           lh.endCDATA();
    403         }
    404       }
    405     }
    406     break;
    407     case Node.TEXT_NODE :
    408     {
    409       //String data = ((Text) node).getData();
    410 
    411       if (nextIsRaw)
    412       {
    413         nextIsRaw = false;
    414 
    415         m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, "");
    416         dispatachChars(node);
    417         m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, "");
    418       }
    419       else
    420       {
    421         dispatachChars(node);
    422       }
    423     }
    424     break;
    425     case Node.ENTITY_REFERENCE_NODE :
    426     {
    427       EntityReference eref = (EntityReference) node;
    428 
    429       if (m_contentHandler instanceof LexicalHandler)
    430       {
    431         ((LexicalHandler) this.m_contentHandler).startEntity(
    432           eref.getNodeName());
    433       }
    434       else
    435       {
    436 
    437         // warning("Can not output entity to a pure SAX ContentHandler");
    438       }
    439     }
    440     break;
    441     default :
    442     }
    443   }
    444 
    445   /**
    446    * End processing of given node
    447    *
    448    *
    449    * @param node Node we just finished processing
    450    *
    451    * @throws org.xml.sax.SAXException
    452    */
    453   protected void endNode(Node node) throws org.xml.sax.SAXException
    454   {
    455 
    456     switch (node.getNodeType())
    457     {
    458     case Node.DOCUMENT_NODE :
    459       break;
    460 
    461     case Node.ELEMENT_NODE :
    462       String ns = m_dh.getNamespaceOfNode(node);
    463       if(null == ns)
    464         ns = "";
    465       this.m_contentHandler.endElement(ns,
    466                                          m_dh.getLocalNameOfNode(node),
    467                                          node.getNodeName());
    468 
    469       NamedNodeMap atts = ((Element) node).getAttributes();
    470       int nAttrs = atts.getLength();
    471 
    472       for (int i = 0; i < nAttrs; i++)
    473       {
    474         Node attr = atts.item(i);
    475         String attrName = attr.getNodeName();
    476 
    477         if (attrName.equals("xmlns") || attrName.startsWith("xmlns:"))
    478         {
    479           int index;
    480           // Use "" instead of null, as Xerces likes "" for the
    481           // name of the default namespace.  Fix attributed
    482           // to "Steven Murray" <smurray (at) ebt.com>.
    483           String prefix = (index = attrName.indexOf(":")) < 0
    484                           ? "" : attrName.substring(index + 1);
    485 
    486           this.m_contentHandler.endPrefixMapping(prefix);
    487         }
    488       }
    489       break;
    490     case Node.CDATA_SECTION_NODE :
    491       break;
    492     case Node.ENTITY_REFERENCE_NODE :
    493     {
    494       EntityReference eref = (EntityReference) node;
    495 
    496       if (m_contentHandler instanceof LexicalHandler)
    497       {
    498         LexicalHandler lh = ((LexicalHandler) this.m_contentHandler);
    499 
    500         lh.endEntity(eref.getNodeName());
    501       }
    502     }
    503     break;
    504     default :
    505     }
    506   }
    507 }  //TreeWalker
    508 
    509