Home | History | Annotate | Download | only in serializer
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $
     20  */
     21 package org.apache.xml.serializer;
     22 
     23 import java.io.IOException;
     24 
     25 import org.apache.xml.serializer.utils.MsgKey;
     26 import org.apache.xml.serializer.utils.Utils;
     27 import org.xml.sax.Attributes;
     28 import org.xml.sax.SAXException;
     29 
     30 /**
     31  * This class is not a public API.
     32  * It is only public because it is used in other packages.
     33  * This class converts SAX or SAX-like calls to a
     34  * serialized document for xsl:output method of "text".
     35  * @xsl.usage internal
     36  */
     37 public class ToTextStream extends ToStream
     38 {
     39 
     40 
     41   /**
     42    * Default constructor.
     43    */
     44   public ToTextStream()
     45   {
     46     super();
     47   }
     48 
     49 
     50 
     51   /**
     52    * Receive notification of the beginning of a document.
     53    *
     54    * <p>The SAX parser will invoke this method only once, before any
     55    * other methods in this interface or in DTDHandler (except for
     56    * setDocumentLocator).</p>
     57    *
     58    * @throws org.xml.sax.SAXException Any SAX exception, possibly
     59    *            wrapping another exception.
     60    *
     61    * @throws org.xml.sax.SAXException
     62    */
     63   protected void startDocumentInternal() throws org.xml.sax.SAXException
     64   {
     65     super.startDocumentInternal();
     66 
     67     m_needToCallStartDocument = false;
     68 
     69     // No action for the moment.
     70   }
     71 
     72   /**
     73    * Receive notification of the end of a document.
     74    *
     75    * <p>The SAX parser will invoke this method only once, and it will
     76    * be the last method invoked during the parse.  The parser shall
     77    * not invoke this method until it has either abandoned parsing
     78    * (because of an unrecoverable error) or reached the end of
     79    * input.</p>
     80    *
     81    * @throws org.xml.sax.SAXException Any SAX exception, possibly
     82    *            wrapping another exception.
     83    *
     84    * @throws org.xml.sax.SAXException
     85    */
     86   public void endDocument() throws org.xml.sax.SAXException
     87   {
     88     flushPending();
     89     flushWriter();
     90     if (m_tracer != null)
     91         super.fireEndDoc();
     92   }
     93 
     94   /**
     95    * Receive notification of the beginning of an element.
     96    *
     97    * <p>The Parser will invoke this method at the beginning of every
     98    * element in the XML document; there will be a corresponding
     99    * endElement() event for every startElement() event (even when the
    100    * element is empty). All of the element's content will be
    101    * reported, in order, before the corresponding endElement()
    102    * event.</p>
    103    *
    104    * <p>If the element name has a namespace prefix, the prefix will
    105    * still be attached.  Note that the attribute list provided will
    106    * contain only attributes with explicit values (specified or
    107    * defaulted): #IMPLIED attributes will be omitted.</p>
    108    *
    109    *
    110    * @param namespaceURI The Namespace URI, or the empty string if the
    111    *        element has no Namespace URI or if Namespace
    112    *        processing is not being performed.
    113    * @param localName The local name (without prefix), or the
    114    *        empty string if Namespace processing is not being
    115    *        performed.
    116    * @param name The qualified name (with prefix), or the
    117    *        empty string if qualified names are not available.
    118    * @param atts The attributes attached to the element, if any.
    119    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    120    *            wrapping another exception.
    121    * @see #endElement
    122    * @see org.xml.sax.AttributeList
    123    *
    124    * @throws org.xml.sax.SAXException
    125    */
    126   public void startElement(
    127           String namespaceURI, String localName, String name, Attributes atts)
    128             throws org.xml.sax.SAXException
    129   {
    130     // time to fire off startElement event
    131     if (m_tracer != null) {
    132         super.fireStartElem(name);
    133         this.firePseudoAttributes();
    134     }
    135     return;
    136   }
    137 
    138   /**
    139    * Receive notification of the end of an element.
    140    *
    141    * <p>The SAX parser will invoke this method at the end of every
    142    * element in the XML document; there will be a corresponding
    143    * startElement() event for every endElement() event (even when the
    144    * element is empty).</p>
    145    *
    146    * <p>If the element name has a namespace prefix, the prefix will
    147    * still be attached to the name.</p>
    148    *
    149    *
    150    * @param namespaceURI The Namespace URI, or the empty string if the
    151    *        element has no Namespace URI or if Namespace
    152    *        processing is not being performed.
    153    * @param localName The local name (without prefix), or the
    154    *        empty string if Namespace processing is not being
    155    *        performed.
    156    * @param name The qualified name (with prefix), or the
    157    *        empty string if qualified names are not available.
    158    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    159    *            wrapping another exception.
    160    *
    161    * @throws org.xml.sax.SAXException
    162    */
    163   public void endElement(String namespaceURI, String localName, String name)
    164           throws org.xml.sax.SAXException
    165   {
    166         if (m_tracer != null)
    167             super.fireEndElem(name);
    168   }
    169 
    170   /**
    171    * Receive notification of character data.
    172    *
    173    * <p>The Parser will call this method to report each chunk of
    174    * character data.  SAX parsers may return all contiguous character
    175    * data in a single chunk, or they may split it into several
    176    * chunks; however, all of the characters in any single event
    177    * must come from the same external entity, so that the Locator
    178    * provides useful information.</p>
    179    *
    180    * <p>The application must not attempt to read from the array
    181    * outside of the specified range.</p>
    182    *
    183    * <p>Note that some parsers will report whitespace using the
    184    * ignorableWhitespace() method rather than this one (validating
    185    * parsers must do so).</p>
    186    *
    187    * @param ch The characters from the XML document.
    188    * @param start The start position in the array.
    189    * @param length The number of characters to read from the array.
    190    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    191    *            wrapping another exception.
    192    * @see #ignorableWhitespace
    193    * @see org.xml.sax.Locator
    194    */
    195   public void characters(char ch[], int start, int length)
    196           throws org.xml.sax.SAXException
    197   {
    198 
    199     flushPending();
    200 
    201     try
    202     {
    203         if (inTemporaryOutputState()) {
    204             /* leave characters un-processed as we are
    205              * creating temporary output, the output generated by
    206              * this serializer will be input to a final serializer
    207              * later on and it will do the processing in final
    208              * output state (not temporary output state).
    209              *
    210              * A "temporary" ToTextStream serializer is used to
    211              * evaluate attribute value templates (for example),
    212              * and the result of evaluating such a thing
    213              * is fed into a final serializer later on.
    214              */
    215             m_writer.write(ch, start, length);
    216         }
    217         else {
    218             // In final output state we do process the characters!
    219             writeNormalizedChars(ch, start, length, m_lineSepUse);
    220         }
    221 
    222         if (m_tracer != null)
    223             super.fireCharEvent(ch, start, length);
    224     }
    225     catch(IOException ioe)
    226     {
    227       throw new SAXException(ioe);
    228     }
    229   }
    230 
    231   /**
    232    * If available, when the disable-output-escaping attribute is used,
    233    * output raw text without escaping.
    234    *
    235    * @param ch The characters from the XML document.
    236    * @param start The start position in the array.
    237    * @param length The number of characters to read from the array.
    238    *
    239    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    240    *            wrapping another exception.
    241    */
    242   public void charactersRaw(char ch[], int start, int length)
    243           throws org.xml.sax.SAXException
    244   {
    245 
    246     try
    247     {
    248       writeNormalizedChars(ch, start, length, m_lineSepUse);
    249     }
    250     catch(IOException ioe)
    251     {
    252       throw new SAXException(ioe);
    253     }
    254   }
    255 
    256     /**
    257      * Normalize the characters, but don't escape.  Different from
    258      * SerializerToXML#writeNormalizedChars because it does not attempt to do
    259      * XML escaping at all.
    260      *
    261      * @param ch The characters from the XML document.
    262      * @param start The start position in the array.
    263      * @param length The number of characters to read from the array.
    264      * @param useLineSep true if the operating systems
    265      * end-of-line separator should be output rather than a new-line character.
    266      *
    267      * @throws IOException
    268      * @throws org.xml.sax.SAXException
    269      */
    270     void writeNormalizedChars(
    271         final char ch[],
    272             final int start,
    273             final int length,
    274             final boolean useLineSep)
    275             throws IOException, org.xml.sax.SAXException
    276     {
    277         final String encoding = getEncoding();
    278         final java.io.Writer writer = m_writer;
    279         final int end = start + length;
    280 
    281         /* copy a few "constants" before the loop for performance */
    282         final char S_LINEFEED = CharInfo.S_LINEFEED;
    283 
    284         // This for() loop always increments i by one at the end
    285         // of the loop.  Additional increments of i adjust for when
    286         // two input characters (a high/low UTF16 surrogate pair)
    287         // are processed.
    288         for (int i = start; i < end; i++) {
    289             final char c = ch[i];
    290 
    291             if (S_LINEFEED == c && useLineSep) {
    292                 writer.write(m_lineSep, 0, m_lineSepLen);
    293                 // one input char processed
    294             } else if (m_encodingInfo.isInEncoding(c)) {
    295                 writer.write(c);
    296                 // one input char processed
    297             } else if (Encodings.isHighUTF16Surrogate(c)) {
    298                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
    299                 if (codePoint != 0) {
    300                     // I think we can just emit the message,
    301                     // not crash and burn.
    302                     final String integralValue = Integer.toString(codePoint);
    303                     final String msg = Utils.messages.createMessage(
    304                         MsgKey.ER_ILLEGAL_CHARACTER,
    305                         new Object[] { integralValue, encoding });
    306 
    307                     //Older behavior was to throw the message,
    308                     //but newer gentler behavior is to write a message to System.err
    309                     //throw new SAXException(msg);
    310                     System.err.println(msg);
    311 
    312                 }
    313                 i++; // two input chars processed
    314             } else {
    315                 // Don't know what to do with this char, it is
    316                 // not in the encoding and not a high char in
    317                 // a surrogate pair, so write out as an entity ref
    318                 if (encoding != null) {
    319                     /* The output encoding is known,
    320                      * so somthing is wrong.
    321                      */
    322 
    323                     // not in the encoding, so write out a character reference
    324                     writer.write('&');
    325                     writer.write('#');
    326                     writer.write(Integer.toString(c));
    327                     writer.write(';');
    328 
    329                     // I think we can just emit the message,
    330                     // not crash and burn.
    331                     final String integralValue = Integer.toString(c);
    332                     final String msg = Utils.messages.createMessage(
    333                         MsgKey.ER_ILLEGAL_CHARACTER,
    334                         new Object[] { integralValue, encoding });
    335 
    336                     //Older behavior was to throw the message,
    337                     //but newer gentler behavior is to write a message to System.err
    338                     //throw new SAXException(msg);
    339                     System.err.println(msg);
    340                 } else {
    341                     /* The output encoding is not known,
    342                      * so just write it out as-is.
    343                      */
    344                     writer.write(c);
    345                 }
    346 
    347                 // one input char was processed
    348             }
    349         }
    350     }
    351 
    352   /**
    353    * Receive notification of cdata.
    354    *
    355    * <p>The Parser will call this method to report each chunk of
    356    * character data.  SAX parsers may return all contiguous character
    357    * data in a single chunk, or they may split it into several
    358    * chunks; however, all of the characters in any single event
    359    * must come from the same external entity, so that the Locator
    360    * provides useful information.</p>
    361    *
    362    * <p>The application must not attempt to read from the array
    363    * outside of the specified range.</p>
    364    *
    365    * <p>Note that some parsers will report whitespace using the
    366    * ignorableWhitespace() method rather than this one (validating
    367    * parsers must do so).</p>
    368    *
    369    * @param ch The characters from the XML document.
    370    * @param start The start position in the array.
    371    * @param length The number of characters to read from the array.
    372    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    373    *            wrapping another exception.
    374    * @see #ignorableWhitespace
    375    * @see org.xml.sax.Locator
    376    */
    377   public void cdata(char ch[], int start, int length)
    378           throws org.xml.sax.SAXException
    379   {
    380     try
    381     {
    382         writeNormalizedChars(ch, start, length, m_lineSepUse);
    383         if (m_tracer != null)
    384             super.fireCDATAEvent(ch, start, length);
    385     }
    386     catch(IOException ioe)
    387     {
    388       throw new SAXException(ioe);
    389     }
    390   }
    391 
    392   /**
    393    * Receive notification of ignorable whitespace in element content.
    394    *
    395    * <p>Validating Parsers must use this method to report each chunk
    396    * of ignorable whitespace (see the W3C XML 1.0 recommendation,
    397    * section 2.10): non-validating parsers may also use this method
    398    * if they are capable of parsing and using content models.</p>
    399    *
    400    * <p>SAX parsers may return all contiguous whitespace in a single
    401    * chunk, or they may split it into several chunks; however, all of
    402    * the characters in any single event must come from the same
    403    * external entity, so that the Locator provides useful
    404    * information.</p>
    405    *
    406    * <p>The application must not attempt to read from the array
    407    * outside of the specified range.</p>
    408    *
    409    * @param ch The characters from the XML document.
    410    * @param start The start position in the array.
    411    * @param length The number of characters to read from the array.
    412    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    413    *            wrapping another exception.
    414    * @see #characters
    415    *
    416    * @throws org.xml.sax.SAXException
    417    */
    418   public void ignorableWhitespace(char ch[], int start, int length)
    419           throws org.xml.sax.SAXException
    420   {
    421 
    422     try
    423     {
    424       writeNormalizedChars(ch, start, length, m_lineSepUse);
    425     }
    426     catch(IOException ioe)
    427     {
    428       throw new SAXException(ioe);
    429     }
    430   }
    431 
    432   /**
    433    * Receive notification of a processing instruction.
    434    *
    435    * <p>The Parser will invoke this method once for each processing
    436    * instruction found: note that processing instructions may occur
    437    * before or after the main document element.</p>
    438    *
    439    * <p>A SAX parser should never report an XML declaration (XML 1.0,
    440    * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
    441    * using this method.</p>
    442    *
    443    * @param target The processing instruction target.
    444    * @param data The processing instruction data, or null if
    445    *        none was supplied.
    446    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    447    *            wrapping another exception.
    448    *
    449    * @throws org.xml.sax.SAXException
    450    */
    451   public void processingInstruction(String target, String data)
    452           throws org.xml.sax.SAXException
    453   {
    454     // flush anything pending first
    455     flushPending();
    456 
    457     if (m_tracer != null)
    458         super.fireEscapingEvent(target, data);
    459   }
    460 
    461   /**
    462    * Called when a Comment is to be constructed.
    463    * Note that Xalan will normally invoke the other version of this method.
    464    * %REVIEW% In fact, is this one ever needed, or was it a mistake?
    465    *
    466    * @param   data  The comment data.
    467    * @throws org.xml.sax.SAXException Any SAX exception, possibly
    468    *            wrapping another exception.
    469    */
    470   public void comment(String data) throws org.xml.sax.SAXException
    471   {
    472       final int length = data.length();
    473       if (length > m_charsBuff.length)
    474       {
    475           m_charsBuff = new char[length*2 + 1];
    476       }
    477       data.getChars(0, length, m_charsBuff, 0);
    478       comment(m_charsBuff, 0, length);
    479   }
    480 
    481   /**
    482    * Report an XML comment anywhere in the document.
    483    *
    484    * This callback will be used for comments inside or outside the
    485    * document element, including comments in the external DTD
    486    * subset (if read).
    487    *
    488    * @param ch An array holding the characters in the comment.
    489    * @param start The starting position in the array.
    490    * @param length The number of characters to use from the array.
    491    * @throws org.xml.sax.SAXException The application may raise an exception.
    492    */
    493   public void comment(char ch[], int start, int length)
    494           throws org.xml.sax.SAXException
    495   {
    496 
    497     flushPending();
    498     if (m_tracer != null)
    499         super.fireCommentEvent(ch, start, length);
    500   }
    501 
    502   /**
    503    * Receive notivication of a entityReference.
    504    *
    505    * @param name non-null reference to the name of the entity.
    506    *
    507    * @throws org.xml.sax.SAXException
    508    */
    509   public void entityReference(String name) throws org.xml.sax.SAXException
    510   {
    511         if (m_tracer != null)
    512             super.fireEntityReference(name);
    513   }
    514 
    515     /**
    516      * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
    517      */
    518     public void addAttribute(
    519         String uri,
    520         String localName,
    521         String rawName,
    522         String type,
    523         String value,
    524         boolean XSLAttribute)
    525     {
    526         // do nothing, just forget all about the attribute
    527     }
    528 
    529     /**
    530      * @see org.xml.sax.ext.LexicalHandler#endCDATA()
    531      */
    532     public void endCDATA() throws SAXException
    533     {
    534         // do nothing
    535     }
    536 
    537     /**
    538      * @see ExtendedContentHandler#endElement(String)
    539      */
    540     public void endElement(String elemName) throws SAXException
    541     {
    542         if (m_tracer != null)
    543             super.fireEndElem(elemName);
    544     }
    545 
    546     /**
    547      * From XSLTC
    548      */
    549     public void startElement(
    550     String elementNamespaceURI,
    551     String elementLocalName,
    552     String elementName)
    553     throws SAXException
    554     {
    555         if (m_needToCallStartDocument)
    556             startDocumentInternal();
    557         // time to fire off startlement event.
    558         if (m_tracer != null) {
    559             super.fireStartElem(elementName);
    560             this.firePseudoAttributes();
    561         }
    562 
    563         return;
    564     }
    565 
    566 
    567     /**
    568      * From XSLTC
    569      */
    570     public void characters(String characters)
    571     throws SAXException
    572     {
    573         final int length = characters.length();
    574         if (length > m_charsBuff.length)
    575         {
    576             m_charsBuff = new char[length*2 + 1];
    577         }
    578         characters.getChars(0, length, m_charsBuff, 0);
    579         characters(m_charsBuff, 0, length);
    580     }
    581 
    582 
    583     /**
    584      * From XSLTC
    585      */
    586     public void addAttribute(String name, String value)
    587     {
    588         // do nothing, forget about the attribute
    589     }
    590 
    591     /**
    592      * Add a unique attribute
    593      */
    594     public void addUniqueAttribute(String qName, String value, int flags)
    595         throws SAXException
    596     {
    597         // do nothing, forget about the attribute
    598     }
    599 
    600     public boolean startPrefixMapping(
    601         String prefix,
    602         String uri,
    603         boolean shouldFlush)
    604         throws SAXException
    605     {
    606         // no namespace support for HTML
    607         return false;
    608     }
    609 
    610 
    611     public void startPrefixMapping(String prefix, String uri)
    612         throws org.xml.sax.SAXException
    613     {
    614         // no namespace support for HTML
    615     }
    616 
    617 
    618     public void namespaceAfterStartElement(
    619         final String prefix,
    620         final String uri)
    621         throws SAXException
    622     {
    623         // no namespace support for HTML
    624     }
    625 
    626     public void flushPending() throws org.xml.sax.SAXException
    627     {
    628             if (m_needToCallStartDocument)
    629             {
    630                 startDocumentInternal();
    631                 m_needToCallStartDocument = false;
    632             }
    633     }
    634 }
    635