Home | History | Annotate | Download | only in serializer
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: ToHTMLStream.java 468654 2006-10-28 07:09:23Z minchau $
     20  */
     21 package org.apache.xml.serializer;
     22 
     23 import java.io.IOException;
     24 import java.util.Properties;
     25 
     26 import javax.xml.transform.Result;
     27 
     28 import org.apache.xml.serializer.utils.MsgKey;
     29 import org.apache.xml.serializer.utils.Utils;
     30 import org.xml.sax.Attributes;
     31 import org.xml.sax.SAXException;
     32 
     33 /**
     34  * This serializer takes a series of SAX or
     35  * SAX-like events and writes its output
     36  * to the given stream.
     37  *
     38  * This class is not a public API, it is public
     39  * because it is used from another package.
     40  *
     41  * @xsl.usage internal
     42  */
     43 public class ToHTMLStream extends ToStream
     44 {
     45 
     46     /** This flag is set while receiving events from the DTD */
     47     protected boolean m_inDTD = false;
     48 
     49     /** True if the current element is a block element.  (seems like
     50      *  this needs to be a stack. -sb). */
     51     private boolean m_inBlockElem = false;
     52 
     53     /**
     54      * Map that tells which XML characters should have special treatment, and it
     55      *  provides character to entity name lookup.
     56      */
     57     private final CharInfo m_htmlcharInfo =
     58 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
     59         CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
     60 
     61     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
     62     static final Trie m_elementFlags = new Trie();
     63 
     64     static {
     65         initTagReference(m_elementFlags);
     66     }
     67     static void initTagReference(Trie m_elementFlags) {
     68 
     69         // HTML 4.0 loose DTD
     70         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
     71         m_elementFlags.put(
     72             "FRAME",
     73             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
     74         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
     75         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
     76         m_elementFlags.put(
     77             "ISINDEX",
     78             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
     79         m_elementFlags.put(
     80             "APPLET",
     81             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
     82         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
     83         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
     84         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
     85 
     86         // HTML 4.0 strict DTD
     87         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
     88         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
     89         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
     90         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
     91         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
     92         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
     93         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
     94         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
     95         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
     96         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
     97         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
     98         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
     99         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
    100         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
    101         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
    102         m_elementFlags.put(
    103             "SUP",
    104             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
    105         m_elementFlags.put(
    106             "SUB",
    107             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
    108         m_elementFlags.put(
    109             "SPAN",
    110             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
    111         m_elementFlags.put(
    112             "BDO",
    113             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
    114         m_elementFlags.put(
    115             "BR",
    116             new ElemDesc(
    117                 0
    118                     | ElemDesc.SPECIAL
    119                     | ElemDesc.ASPECIAL
    120                     | ElemDesc.EMPTY
    121                     | ElemDesc.BLOCK));
    122         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
    123         m_elementFlags.put(
    124             "ADDRESS",
    125             new ElemDesc(
    126                 0
    127                     | ElemDesc.BLOCK
    128                     | ElemDesc.BLOCKFORM
    129                     | ElemDesc.BLOCKFORMFIELDSET));
    130         m_elementFlags.put(
    131             "DIV",
    132             new ElemDesc(
    133                 0
    134                     | ElemDesc.BLOCK
    135                     | ElemDesc.BLOCKFORM
    136                     | ElemDesc.BLOCKFORMFIELDSET));
    137         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
    138         m_elementFlags.put(
    139             "MAP",
    140             new ElemDesc(
    141                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
    142         m_elementFlags.put(
    143             "AREA",
    144             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
    145         m_elementFlags.put(
    146             "LINK",
    147             new ElemDesc(
    148                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
    149         m_elementFlags.put(
    150             "IMG",
    151             new ElemDesc(
    152                 0
    153                     | ElemDesc.SPECIAL
    154                     | ElemDesc.ASPECIAL
    155                     | ElemDesc.EMPTY
    156                     | ElemDesc.WHITESPACESENSITIVE));
    157         m_elementFlags.put(
    158             "OBJECT",
    159             new ElemDesc(
    160                 0
    161                     | ElemDesc.SPECIAL
    162                     | ElemDesc.ASPECIAL
    163                     | ElemDesc.HEADMISC
    164                     | ElemDesc.WHITESPACESENSITIVE));
    165         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
    166         m_elementFlags.put(
    167             "HR",
    168             new ElemDesc(
    169                 0
    170                     | ElemDesc.BLOCK
    171                     | ElemDesc.BLOCKFORM
    172                     | ElemDesc.BLOCKFORMFIELDSET
    173                     | ElemDesc.EMPTY));
    174         m_elementFlags.put(
    175             "P",
    176             new ElemDesc(
    177                 0
    178                     | ElemDesc.BLOCK
    179                     | ElemDesc.BLOCKFORM
    180                     | ElemDesc.BLOCKFORMFIELDSET));
    181         m_elementFlags.put(
    182             "H1",
    183             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
    184         m_elementFlags.put(
    185             "H2",
    186             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
    187         m_elementFlags.put(
    188             "H3",
    189             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
    190         m_elementFlags.put(
    191             "H4",
    192             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
    193         m_elementFlags.put(
    194             "H5",
    195             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
    196         m_elementFlags.put(
    197             "H6",
    198             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
    199         m_elementFlags.put(
    200             "PRE",
    201             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
    202         m_elementFlags.put(
    203             "Q",
    204             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
    205         m_elementFlags.put(
    206             "BLOCKQUOTE",
    207             new ElemDesc(
    208                 0
    209                     | ElemDesc.BLOCK
    210                     | ElemDesc.BLOCKFORM
    211                     | ElemDesc.BLOCKFORMFIELDSET));
    212         m_elementFlags.put("INS", new ElemDesc(0));
    213         m_elementFlags.put("DEL", new ElemDesc(0));
    214         m_elementFlags.put(
    215             "DL",
    216             new ElemDesc(
    217                 0
    218                     | ElemDesc.BLOCK
    219                     | ElemDesc.BLOCKFORM
    220                     | ElemDesc.BLOCKFORMFIELDSET));
    221         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
    222         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
    223         m_elementFlags.put(
    224             "OL",
    225             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
    226         m_elementFlags.put(
    227             "UL",
    228             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
    229         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
    230         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
    231         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
    232         m_elementFlags.put(
    233             "INPUT",
    234             new ElemDesc(
    235                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
    236         m_elementFlags.put(
    237             "SELECT",
    238             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
    239         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
    240         m_elementFlags.put("OPTION", new ElemDesc(0));
    241         m_elementFlags.put(
    242             "TEXTAREA",
    243             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
    244         m_elementFlags.put(
    245             "FIELDSET",
    246             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
    247         m_elementFlags.put("LEGEND", new ElemDesc(0));
    248         m_elementFlags.put(
    249             "BUTTON",
    250             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
    251         m_elementFlags.put(
    252             "TABLE",
    253             new ElemDesc(
    254                 0
    255                     | ElemDesc.BLOCK
    256                     | ElemDesc.BLOCKFORM
    257                     | ElemDesc.BLOCKFORMFIELDSET));
    258         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
    259         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
    260         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
    261         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
    262         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
    263         m_elementFlags.put(
    264             "COL",
    265             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
    266         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
    267         m_elementFlags.put("TH", new ElemDesc(0));
    268         m_elementFlags.put("TD", new ElemDesc(0));
    269         m_elementFlags.put(
    270             "HEAD",
    271             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
    272         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
    273         m_elementFlags.put(
    274             "BASE",
    275             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
    276         m_elementFlags.put(
    277             "META",
    278             new ElemDesc(
    279                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
    280         m_elementFlags.put(
    281             "STYLE",
    282             new ElemDesc(
    283                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
    284         m_elementFlags.put(
    285             "SCRIPT",
    286             new ElemDesc(
    287                 0
    288                     | ElemDesc.SPECIAL
    289                     | ElemDesc.ASPECIAL
    290                     | ElemDesc.HEADMISC
    291                     | ElemDesc.RAW));
    292         m_elementFlags.put(
    293             "NOSCRIPT",
    294             new ElemDesc(
    295                 0
    296                     | ElemDesc.BLOCK
    297                     | ElemDesc.BLOCKFORM
    298                     | ElemDesc.BLOCKFORMFIELDSET));
    299         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM));
    300 
    301         // From "John Ky" <hand (at) syd.speednet.com.au
    302         // Transitional Document Type Definition ()
    303         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
    304         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
    305 
    306         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
    307         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
    308         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
    309 
    310         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
    311         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
    312 
    313         // From "John Ky" <hand (at) syd.speednet.com.au
    314         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
    315 
    316         // HTML 4.0, section 16.5
    317         m_elementFlags.put(
    318             "IFRAME",
    319             new ElemDesc(
    320                 0
    321                     | ElemDesc.BLOCK
    322                     | ElemDesc.BLOCKFORM
    323                     | ElemDesc.BLOCKFORMFIELDSET));
    324 
    325         // Netscape 4 extension
    326         m_elementFlags.put(
    327             "LAYER",
    328             new ElemDesc(
    329                 0
    330                     | ElemDesc.BLOCK
    331                     | ElemDesc.BLOCKFORM
    332                     | ElemDesc.BLOCKFORMFIELDSET));
    333         // Netscape 4 extension
    334         m_elementFlags.put(
    335             "ILAYER",
    336             new ElemDesc(
    337                 0
    338                     | ElemDesc.BLOCK
    339                     | ElemDesc.BLOCKFORM
    340                     | ElemDesc.BLOCKFORMFIELDSET));
    341 
    342         // NOW FOR ATTRIBUTE INFORMATION . . .
    343         ElemDesc elemDesc;
    344 
    345 
    346         // ----------------------------------------------
    347         elemDesc = (ElemDesc) m_elementFlags.get("a");
    348         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
    349         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
    350 
    351         // ----------------------------------------------
    352         elemDesc = (ElemDesc) m_elementFlags.get("area");
    353 
    354         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
    355         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
    356 
    357         // ----------------------------------------------
    358         elemDesc = (ElemDesc) m_elementFlags.get("base");
    359 
    360         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
    361 
    362         // ----------------------------------------------
    363         elemDesc = (ElemDesc) m_elementFlags.get("button");
    364         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
    365 
    366         // ----------------------------------------------
    367         elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
    368 
    369         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
    370 
    371         // ----------------------------------------------
    372         elemDesc = (ElemDesc) m_elementFlags.get("del");
    373         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
    374 
    375         // ----------------------------------------------
    376         elemDesc = (ElemDesc) m_elementFlags.get("dir");
    377         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
    378 
    379         // ----------------------------------------------
    380 
    381         elemDesc = (ElemDesc) m_elementFlags.get("div");
    382         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
    383         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
    384 
    385         // ----------------------------------------------
    386         elemDesc = (ElemDesc) m_elementFlags.get("dl");
    387         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
    388 
    389         // ----------------------------------------------
    390         elemDesc = (ElemDesc) m_elementFlags.get("form");
    391         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
    392 
    393         // ----------------------------------------------
    394         // Attribution to: "Voytenko, Dimitry" <DVoytenko (at) SECTORBASE.COM>
    395         elemDesc = (ElemDesc) m_elementFlags.get("frame");
    396         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    397         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
    398         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
    399 
    400         // ----------------------------------------------
    401         elemDesc = (ElemDesc) m_elementFlags.get("head");
    402         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
    403 
    404         // ----------------------------------------------
    405         elemDesc = (ElemDesc) m_elementFlags.get("hr");
    406         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
    407 
    408         // ----------------------------------------------
    409         // HTML 4.0, section 16.5
    410         elemDesc = (ElemDesc) m_elementFlags.get("iframe");
    411         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    412         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
    413 
    414         // ----------------------------------------------
    415         // Netscape 4 extension
    416         elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
    417         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    418 
    419         // ----------------------------------------------
    420         elemDesc = (ElemDesc) m_elementFlags.get("img");
    421         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    422         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
    423         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
    424         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
    425 
    426         // ----------------------------------------------
    427         elemDesc = (ElemDesc) m_elementFlags.get("input");
    428 
    429         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    430         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
    431         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
    432         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
    433         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
    434         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
    435 
    436         // ----------------------------------------------
    437         elemDesc = (ElemDesc) m_elementFlags.get("ins");
    438         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
    439 
    440         // ----------------------------------------------
    441         // Netscape 4 extension
    442         elemDesc = (ElemDesc) m_elementFlags.get("layer");
    443         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    444 
    445         // ----------------------------------------------
    446         elemDesc = (ElemDesc) m_elementFlags.get("link");
    447         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
    448 
    449         // ----------------------------------------------
    450         elemDesc = (ElemDesc) m_elementFlags.get("menu");
    451         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
    452 
    453         // ----------------------------------------------
    454         elemDesc = (ElemDesc) m_elementFlags.get("object");
    455 
    456         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
    457         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
    458         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
    459         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
    460         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
    461         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
    462 
    463         // ----------------------------------------------
    464         elemDesc = (ElemDesc) m_elementFlags.get("ol");
    465         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
    466 
    467         // ----------------------------------------------
    468         elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
    469         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
    470 
    471         // ----------------------------------------------
    472         elemDesc = (ElemDesc) m_elementFlags.get("option");
    473         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
    474         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
    475 
    476         // ----------------------------------------------
    477         elemDesc = (ElemDesc) m_elementFlags.get("q");
    478         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
    479 
    480         // ----------------------------------------------
    481         elemDesc = (ElemDesc) m_elementFlags.get("script");
    482         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
    483         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
    484         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
    485 
    486         // ----------------------------------------------
    487         elemDesc = (ElemDesc) m_elementFlags.get("select");
    488         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
    489         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
    490 
    491         // ----------------------------------------------
    492         elemDesc = (ElemDesc) m_elementFlags.get("table");
    493         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
    494 
    495         // ----------------------------------------------
    496         elemDesc = (ElemDesc) m_elementFlags.get("td");
    497         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
    498 
    499         // ----------------------------------------------
    500         elemDesc = (ElemDesc) m_elementFlags.get("textarea");
    501         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
    502         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
    503 
    504         // ----------------------------------------------
    505         elemDesc = (ElemDesc) m_elementFlags.get("th");
    506         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
    507 
    508         // ----------------------------------------------
    509         // The nowrap attribute of a tr element is both
    510         // a Netscape and Internet-Explorer extension
    511         elemDesc = (ElemDesc) m_elementFlags.get("tr");
    512         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
    513 
    514         // ----------------------------------------------
    515         elemDesc = (ElemDesc) m_elementFlags.get("ul");
    516         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
    517     }
    518 
    519     /**
    520      * Dummy element for elements not found.
    521      */
    522     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
    523 
    524     /** True if URLs should be specially escaped with the %xx form. */
    525     private boolean m_specialEscapeURLs = true;
    526 
    527     /** True if the META tag should be omitted. */
    528     private boolean m_omitMetaTag = false;
    529 
    530     /**
    531      * Tells if the formatter should use special URL escaping.
    532      *
    533      * @param bool True if URLs should be specially escaped with the %xx form.
    534      */
    535     public void setSpecialEscapeURLs(boolean bool)
    536     {
    537         m_specialEscapeURLs = bool;
    538     }
    539 
    540     /**
    541      * Tells if the formatter should omit the META tag.
    542      *
    543      * @param bool True if the META tag should be omitted.
    544      */
    545     public void setOmitMetaTag(boolean bool)
    546     {
    547         m_omitMetaTag = bool;
    548     }
    549 
    550     /**
    551      * Specifies an output format for this serializer. It the
    552      * serializer has already been associated with an output format,
    553      * it will switch to the new format. This method should not be
    554      * called while the serializer is in the process of serializing
    555      * a document.
    556      *
    557      * This method can be called multiple times before starting
    558      * the serialization of a particular result-tree. In principle
    559      * all serialization parameters can be changed, with the exception
    560      * of method="html" (it must be method="html" otherwise we
    561      * shouldn't even have a ToHTMLStream object here!)
    562      *
    563      * @param format The output format or serialzation parameters
    564      * to use.
    565      */
    566     public void setOutputFormat(Properties format)
    567     {
    568         /*
    569          * If "format" does not contain the property
    570          * S_USE_URL_ESCAPING, then don't set this value at all,
    571          * just leave as-is rather than explicitly setting it.
    572          */
    573         String value;
    574         value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING);
    575         if (value != null) {
    576             m_specialEscapeURLs =
    577                 OutputPropertyUtils.getBooleanProperty(
    578                     OutputPropertiesFactory.S_USE_URL_ESCAPING,
    579                     format);
    580         }
    581 
    582         /*
    583          * If "format" does not contain the property
    584          * S_OMIT_META_TAG, then don't set this value at all,
    585          * just leave as-is rather than explicitly setting it.
    586          */
    587         value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG);
    588         if (value != null) {
    589            m_omitMetaTag =
    590                 OutputPropertyUtils.getBooleanProperty(
    591                     OutputPropertiesFactory.S_OMIT_META_TAG,
    592                     format);
    593         }
    594 
    595         super.setOutputFormat(format);
    596     }
    597 
    598     /**
    599      * Tells if the formatter should use special URL escaping.
    600      *
    601      * @return True if URLs should be specially escaped with the %xx form.
    602      */
    603     private final boolean getSpecialEscapeURLs()
    604     {
    605         return m_specialEscapeURLs;
    606     }
    607 
    608     /**
    609      * Tells if the formatter should omit the META tag.
    610      *
    611      * @return True if the META tag should be omitted.
    612      */
    613     private final boolean getOmitMetaTag()
    614     {
    615         return m_omitMetaTag;
    616     }
    617 
    618     /**
    619      * Get a description of the given element.
    620      *
    621      * @param name non-null name of element, case insensitive.
    622      *
    623      * @return non-null reference to ElemDesc, which may be m_dummy if no
    624      *         element description matches the given name.
    625      */
    626     public static final ElemDesc getElemDesc(String name)
    627     {
    628         /* this method used to return m_dummy  when name was null
    629          * but now it doesn't check and and requires non-null name.
    630          */
    631         Object obj = m_elementFlags.get(name);
    632         if (null != obj)
    633             return (ElemDesc)obj;
    634         return m_dummy;
    635     }
    636 
    637 
    638     /**
    639      * A Trie that is just a copy of the "static" one.
    640      * We need this one to be able to use the faster, but not thread-safe
    641      * method Trie.get2(name)
    642      */
    643     private Trie m_htmlInfo = new Trie(m_elementFlags);
    644     /**
    645      * Calls to this method could be replaced with calls to
    646      * getElemDesc(name), but this one should be faster.
    647      */
    648     private ElemDesc getElemDesc2(String name)
    649     {
    650         Object obj = m_htmlInfo.get2(name);
    651         if (null != obj)
    652             return (ElemDesc)obj;
    653         return m_dummy;
    654     }
    655 
    656     /**
    657      * Default constructor.
    658      */
    659     public ToHTMLStream()
    660     {
    661 
    662         super();
    663         // we are just constructing this thing, no output properties
    664         // have been used, so we will set the right default for
    665         // indenting anyways
    666         m_doIndent = true;
    667         m_charInfo = m_htmlcharInfo;
    668         // initialize namespaces
    669         m_prefixMap = new NamespaceMappings();
    670 
    671     }
    672 
    673     /** The name of the current element. */
    674 //    private String m_currentElementName = null;
    675 
    676     /**
    677      * Receive notification of the beginning of a document.
    678      *
    679      * @throws org.xml.sax.SAXException Any SAX exception, possibly
    680      *            wrapping another exception.
    681      *
    682      * @throws org.xml.sax.SAXException
    683      */
    684     protected void startDocumentInternal() throws org.xml.sax.SAXException
    685     {
    686         super.startDocumentInternal();
    687 
    688         m_needToCallStartDocument = false;
    689         m_needToOutputDocTypeDecl = true;
    690         m_startNewLine = false;
    691         setOmitXMLDeclaration(true);
    692     }
    693 
    694     /**
    695      * This method should only get called once.
    696      * If a DOCTYPE declaration needs to get written out, it will
    697      * be written out. If it doesn't need to be written out, then
    698      * the call to this method has no effect.
    699      */
    700     private void outputDocTypeDecl(String name) throws SAXException {
    701         if (true == m_needToOutputDocTypeDecl)
    702         {
    703             String doctypeSystem = getDoctypeSystem();
    704             String doctypePublic = getDoctypePublic();
    705             if ((null != doctypeSystem) || (null != doctypePublic))
    706             {
    707                 final java.io.Writer writer = m_writer;
    708                 try
    709                 {
    710                 writer.write("<!DOCTYPE ");
    711                 writer.write(name);
    712 
    713                 if (null != doctypePublic)
    714                 {
    715                     writer.write(" PUBLIC \"");
    716                     writer.write(doctypePublic);
    717                     writer.write('"');
    718                 }
    719 
    720                 if (null != doctypeSystem)
    721                 {
    722                     if (null == doctypePublic)
    723                         writer.write(" SYSTEM \"");
    724                     else
    725                         writer.write(" \"");
    726 
    727                     writer.write(doctypeSystem);
    728                     writer.write('"');
    729                 }
    730 
    731                 writer.write('>');
    732                 outputLineSep();
    733                 }
    734                 catch(IOException e)
    735                 {
    736                     throw new SAXException(e);
    737                 }
    738             }
    739         }
    740 
    741         m_needToOutputDocTypeDecl = false;
    742     }
    743 
    744     /**
    745      * Receive notification of the end of a document.
    746      *
    747      * @throws org.xml.sax.SAXException Any SAX exception, possibly
    748      *            wrapping another exception.
    749      *
    750      * @throws org.xml.sax.SAXException
    751      */
    752     public final void endDocument() throws org.xml.sax.SAXException
    753     {
    754 
    755         flushPending();
    756         if (m_doIndent && !m_isprevtext)
    757         {
    758             try
    759             {
    760             outputLineSep();
    761             }
    762             catch(IOException e)
    763             {
    764                 throw new SAXException(e);
    765             }
    766         }
    767 
    768         flushWriter();
    769         if (m_tracer != null)
    770             super.fireEndDoc();
    771     }
    772 
    773     /**
    774      *  Receive notification of the beginning of an element.
    775      *
    776      *
    777      *  @param namespaceURI
    778      *  @param localName
    779      *  @param name The element type name.
    780      *  @param atts The attributes attached to the element, if any.
    781      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
    782      *             wrapping another exception.
    783      *  @see #endElement
    784      *  @see org.xml.sax.AttributeList
    785      */
    786     public void startElement(
    787         String namespaceURI,
    788         String localName,
    789         String name,
    790         Attributes atts)
    791         throws org.xml.sax.SAXException
    792     {
    793 
    794         ElemContext elemContext = m_elemContext;
    795 
    796         // clean up any pending things first
    797         if (elemContext.m_startTagOpen)
    798         {
    799             closeStartTag();
    800             elemContext.m_startTagOpen = false;
    801         }
    802         else if (m_cdataTagOpen)
    803         {
    804             closeCDATA();
    805             m_cdataTagOpen = false;
    806         }
    807         else if (m_needToCallStartDocument)
    808         {
    809             startDocumentInternal();
    810             m_needToCallStartDocument = false;
    811         }
    812 
    813         if (m_needToOutputDocTypeDecl) {
    814             String n = name;
    815             if (n == null || n.length() == 0) {
    816                 // If the lexical QName is not given
    817                 // use the localName in the DOCTYPE
    818                 n = localName;
    819             }
    820             outputDocTypeDecl(n);
    821         }
    822 
    823 
    824         // if this element has a namespace then treat it like XML
    825         if (null != namespaceURI && namespaceURI.length() > 0)
    826         {
    827             super.startElement(namespaceURI, localName, name, atts);
    828 
    829             return;
    830         }
    831 
    832         try
    833         {
    834             // getElemDesc2(name) is faster than getElemDesc(name)
    835             ElemDesc elemDesc = getElemDesc2(name);
    836             int elemFlags = elemDesc.getFlags();
    837 
    838             // deal with indentation issues first
    839             if (m_doIndent)
    840             {
    841 
    842                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
    843                 if (m_ispreserve)
    844                     m_ispreserve = false;
    845                 else if (
    846                     (null != elemContext.m_elementName)
    847                     && (!m_inBlockElem
    848                         || isBlockElement) /* && !isWhiteSpaceSensitive */
    849                     )
    850                 {
    851                     m_startNewLine = true;
    852 
    853                     indent();
    854 
    855                 }
    856                 m_inBlockElem = !isBlockElement;
    857             }
    858 
    859             // save any attributes for later processing
    860             if (atts != null)
    861                 addAttributes(atts);
    862 
    863             m_isprevtext = false;
    864             final java.io.Writer writer = m_writer;
    865             writer.write('<');
    866             writer.write(name);
    867 
    868 
    869 
    870             if (m_tracer != null)
    871                 firePseudoAttributes();
    872 
    873             if ((elemFlags & ElemDesc.EMPTY) != 0)
    874             {
    875                 // an optimization for elements which are expected
    876                 // to be empty.
    877                 m_elemContext = elemContext.push();
    878                 /* XSLTC sometimes calls namespaceAfterStartElement()
    879                  * so we need to remember the name
    880                  */
    881                 m_elemContext.m_elementName = name;
    882                 m_elemContext.m_elementDesc = elemDesc;
    883                 return;
    884             }
    885             else
    886             {
    887                 elemContext = elemContext.push(namespaceURI,localName,name);
    888                 m_elemContext = elemContext;
    889                 elemContext.m_elementDesc = elemDesc;
    890                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
    891             }
    892 
    893 
    894             if ((elemFlags & ElemDesc.HEADELEM) != 0)
    895             {
    896                 // This is the <HEAD> element, do some special processing
    897                 closeStartTag();
    898                 elemContext.m_startTagOpen = false;
    899                 if (!m_omitMetaTag)
    900                 {
    901                     if (m_doIndent)
    902                         indent();
    903                     writer.write(
    904                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
    905                     String encoding = getEncoding();
    906                     String encode = Encodings.getMimeEncoding(encoding);
    907                     writer.write(encode);
    908                     writer.write("\">");
    909                 }
    910             }
    911         }
    912         catch (IOException e)
    913         {
    914             throw new SAXException(e);
    915         }
    916     }
    917 
    918     /**
    919      *  Receive notification of the end of an element.
    920      *
    921      *
    922      *  @param namespaceURI
    923      *  @param localName
    924      *  @param name The element type name
    925      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
    926      *             wrapping another exception.
    927      */
    928     public final void endElement(
    929         final String namespaceURI,
    930         final String localName,
    931         final String name)
    932         throws org.xml.sax.SAXException
    933     {
    934         // deal with any pending issues
    935         if (m_cdataTagOpen)
    936             closeCDATA();
    937 
    938         // if the element has a namespace, treat it like XML, not HTML
    939         if (null != namespaceURI && namespaceURI.length() > 0)
    940         {
    941             super.endElement(namespaceURI, localName, name);
    942 
    943             return;
    944         }
    945 
    946         try
    947         {
    948 
    949             ElemContext elemContext = m_elemContext;
    950             final ElemDesc elemDesc = elemContext.m_elementDesc;
    951             final int elemFlags = elemDesc.getFlags();
    952             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
    953 
    954             // deal with any indentation issues
    955             if (m_doIndent)
    956             {
    957                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
    958                 boolean shouldIndent = false;
    959 
    960                 if (m_ispreserve)
    961                 {
    962                     m_ispreserve = false;
    963                 }
    964                 else if (m_doIndent && (!m_inBlockElem || isBlockElement))
    965                 {
    966                     m_startNewLine = true;
    967                     shouldIndent = true;
    968                 }
    969                 if (!elemContext.m_startTagOpen && shouldIndent)
    970                     indent(elemContext.m_currentElemDepth - 1);
    971                 m_inBlockElem = !isBlockElement;
    972             }
    973 
    974             final java.io.Writer writer = m_writer;
    975             if (!elemContext.m_startTagOpen)
    976             {
    977                 writer.write("</");
    978                 writer.write(name);
    979                 writer.write('>');
    980             }
    981             else
    982             {
    983                 // the start-tag open when this method was called,
    984                 // so we need to process it now.
    985 
    986                 if (m_tracer != null)
    987                     super.fireStartElem(name);
    988 
    989                 // the starting tag was still open when we received this endElement() call
    990                 // so we need to process any gathered attributes NOW, before they go away.
    991                 int nAttrs = m_attributes.getLength();
    992                 if (nAttrs > 0)
    993                 {
    994                     processAttributes(m_writer, nAttrs);
    995                     // clear attributes object for re-use with next element
    996                     m_attributes.clear();
    997                 }
    998                 if (!elemEmpty)
    999                 {
   1000                     // As per Dave/Paul recommendation 12/06/2000
   1001                     // if (shouldIndent)
   1002                     // writer.write('>');
   1003                     //  indent(m_currentIndent);
   1004 
   1005                     writer.write("></");
   1006                     writer.write(name);
   1007                     writer.write('>');
   1008                 }
   1009                 else
   1010                 {
   1011                     writer.write('>');
   1012                 }
   1013             }
   1014 
   1015             // clean up because the element has ended
   1016             if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
   1017                 m_ispreserve = true;
   1018             m_isprevtext = false;
   1019 
   1020             // fire off the end element event
   1021             if (m_tracer != null)
   1022                 super.fireEndElem(name);
   1023 
   1024             // OPTIMIZE-EMPTY
   1025             if (elemEmpty)
   1026             {
   1027                 // a quick exit if the HTML element had no children.
   1028                 // This block of code can be removed if the corresponding block of code
   1029                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
   1030                 m_elemContext = elemContext.m_prev;
   1031                 return;
   1032             }
   1033 
   1034             // some more clean because the element has ended.
   1035             if (!elemContext.m_startTagOpen)
   1036             {
   1037                 if (m_doIndent && !m_preserves.isEmpty())
   1038                     m_preserves.pop();
   1039             }
   1040             m_elemContext = elemContext.m_prev;
   1041 //            m_isRawStack.pop();
   1042         }
   1043         catch (IOException e)
   1044         {
   1045             throw new SAXException(e);
   1046         }
   1047     }
   1048 
   1049     /**
   1050      * Process an attribute.
   1051      * @param   writer The writer to write the processed output to.
   1052      * @param   name   The name of the attribute.
   1053      * @param   value   The value of the attribute.
   1054      * @param   elemDesc The description of the HTML element
   1055      *           that has this attribute.
   1056      *
   1057      * @throws org.xml.sax.SAXException
   1058      */
   1059     protected void processAttribute(
   1060         java.io.Writer writer,
   1061         String name,
   1062         String value,
   1063         ElemDesc elemDesc)
   1064         throws IOException
   1065     {
   1066         writer.write(' ');
   1067 
   1068         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
   1069             && elemDesc != null
   1070             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
   1071         {
   1072             writer.write(name);
   1073         }
   1074         else
   1075         {
   1076             // %REVIEW% %OPT%
   1077             // Two calls to single-char write may NOT
   1078             // be more efficient than one to string-write...
   1079             writer.write(name);
   1080             writer.write("=\"");
   1081             if (   elemDesc != null
   1082                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
   1083                 writeAttrURI(writer, value, m_specialEscapeURLs);
   1084             else
   1085                 writeAttrString(writer, value, this.getEncoding());
   1086             writer.write('"');
   1087 
   1088         }
   1089     }
   1090 
   1091     /**
   1092      * Tell if a character is an ASCII digit.
   1093      */
   1094     private boolean isASCIIDigit(char c)
   1095     {
   1096         return (c >= '0' && c <= '9');
   1097     }
   1098 
   1099     /**
   1100      * Make an integer into an HH hex value.
   1101      * Does no checking on the size of the input, since this
   1102      * is only meant to be used locally by writeAttrURI.
   1103      *
   1104      * @param i must be a value less than 255.
   1105      *
   1106      * @return should be a two character string.
   1107      */
   1108     private static String makeHHString(int i)
   1109     {
   1110         String s = Integer.toHexString(i).toUpperCase();
   1111         if (s.length() == 1)
   1112         {
   1113             s = "0" + s;
   1114         }
   1115         return s;
   1116     }
   1117 
   1118     /**
   1119     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
   1120     * @param str must be 2 characters long
   1121     *
   1122     * @return true or false
   1123     */
   1124     private boolean isHHSign(String str)
   1125     {
   1126         boolean sign = true;
   1127         try
   1128         {
   1129             char r = (char) Integer.parseInt(str, 16);
   1130         }
   1131         catch (NumberFormatException e)
   1132         {
   1133             sign = false;
   1134         }
   1135         return sign;
   1136     }
   1137 
   1138     /**
   1139      * Write the specified <var>string</var> after substituting non ASCII characters,
   1140      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
   1141      *
   1142      * @param   string      String to convert to XML format.
   1143      * @param doURLEscaping True if we should try to encode as
   1144      *                      per http://www.ietf.org/rfc/rfc2396.txt.
   1145      *
   1146      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
   1147      */
   1148     public void writeAttrURI(
   1149         final java.io.Writer writer, String string, boolean doURLEscaping)
   1150         throws IOException
   1151     {
   1152         // http://www.ietf.org/rfc/rfc2396.txt says:
   1153         // A URI is always in an "escaped" form, since escaping or unescaping a
   1154         // completed URI might change its semantics.  Normally, the only time
   1155         // escape encodings can safely be made is when the URI is being created
   1156         // from its component parts; each component may have its own set of
   1157         // characters that are reserved, so only the mechanism responsible for
   1158         // generating or interpreting that component can determine whether or
   1159         // not escaping a character will change its semantics. Likewise, a URI
   1160         // must be separated into its components before the escaped characters
   1161         // within those components can be safely decoded.
   1162         //
   1163         // ...So we do our best to do limited escaping of the URL, without
   1164         // causing damage.  If the URL is already properly escaped, in theory, this
   1165         // function should not change the string value.
   1166 
   1167         final int end = string.length();
   1168         if (end > m_attrBuff.length)
   1169         {
   1170            m_attrBuff = new char[end*2 + 1];
   1171         }
   1172         string.getChars(0,end, m_attrBuff, 0);
   1173         final char[] chars = m_attrBuff;
   1174 
   1175         int cleanStart = 0;
   1176         int cleanLength = 0;
   1177 
   1178 
   1179         char ch = 0;
   1180         for (int i = 0; i < end; i++)
   1181         {
   1182             ch = chars[i];
   1183 
   1184             if ((ch < 32) || (ch > 126))
   1185             {
   1186                 if (cleanLength > 0)
   1187                 {
   1188                     writer.write(chars, cleanStart, cleanLength);
   1189                     cleanLength = 0;
   1190                 }
   1191                 if (doURLEscaping)
   1192                 {
   1193                     // Encode UTF16 to UTF8.
   1194                     // Reference is Unicode, A Primer, by Tony Graham.
   1195                     // Page 92.
   1196 
   1197                     // Note that Kay doesn't escape 0x20...
   1198                     //  if(ch == 0x20) // Not sure about this... -sb
   1199                     //  {
   1200                     //    writer.write(ch);
   1201                     //  }
   1202                     //  else
   1203                     if (ch <= 0x7F)
   1204                     {
   1205                         writer.write('%');
   1206                         writer.write(makeHHString(ch));
   1207                     }
   1208                     else if (ch <= 0x7FF)
   1209                     {
   1210                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
   1211                         // and set two high bits.
   1212                         int high = (ch >> 6) | 0xC0;
   1213                         int low = (ch & 0x3F) | 0x80;
   1214                         // First 6 bits, + high bit
   1215                         writer.write('%');
   1216                         writer.write(makeHHString(high));
   1217                         writer.write('%');
   1218                         writer.write(makeHHString(low));
   1219                     }
   1220                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
   1221                     {
   1222                         // I'm sure this can be done in 3 instructions, but I choose
   1223                         // to try and do it exactly like it is done in the book, at least
   1224                         // until we are sure this is totally clean.  I don't think performance
   1225                         // is a big issue with this particular function, though I could be
   1226                         // wrong.  Also, the stuff below clearly does more masking than
   1227                         // it needs to do.
   1228 
   1229                         // Clear high 6 bits.
   1230                         int highSurrogate = ((int) ch) & 0x03FF;
   1231 
   1232                         // Middle 4 bits (wwww) + 1
   1233                         // "Note that the value of wwww from the high surrogate bit pattern
   1234                         // is incremented to make the uuuuu bit pattern in the scalar value
   1235                         // so the surrogate pair don't address the BMP."
   1236                         int wwww = ((highSurrogate & 0x03C0) >> 6);
   1237                         int uuuuu = wwww + 1;
   1238 
   1239                         // next 4 bits
   1240                         int zzzz = (highSurrogate & 0x003C) >> 2;
   1241 
   1242                         // low 2 bits
   1243                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
   1244 
   1245                         // Get low surrogate character.
   1246                         ch = chars[++i];
   1247 
   1248                         // Clear high 6 bits.
   1249                         int lowSurrogate = ((int) ch) & 0x03FF;
   1250 
   1251                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
   1252                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
   1253 
   1254                         // bottom 6 bits.
   1255                         int xxxxxx = (lowSurrogate & 0x003F);
   1256 
   1257                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
   1258                         int byte2 =
   1259                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
   1260                         int byte3 = 0x80 | yyyyyy;
   1261                         int byte4 = 0x80 | xxxxxx;
   1262 
   1263                         writer.write('%');
   1264                         writer.write(makeHHString(byte1));
   1265                         writer.write('%');
   1266                         writer.write(makeHHString(byte2));
   1267                         writer.write('%');
   1268                         writer.write(makeHHString(byte3));
   1269                         writer.write('%');
   1270                         writer.write(makeHHString(byte4));
   1271                     }
   1272                     else
   1273                     {
   1274                         int high = (ch >> 12) | 0xE0; // top 4 bits
   1275                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
   1276                         // middle 6 bits
   1277                         int low = (ch & 0x3F) | 0x80;
   1278                         // First 6 bits, + high bit
   1279                         writer.write('%');
   1280                         writer.write(makeHHString(high));
   1281                         writer.write('%');
   1282                         writer.write(makeHHString(middle));
   1283                         writer.write('%');
   1284                         writer.write(makeHHString(low));
   1285                     }
   1286 
   1287                 }
   1288                 else if (escapingNotNeeded(ch))
   1289                 {
   1290                     writer.write(ch);
   1291                 }
   1292                 else
   1293                 {
   1294                     writer.write("&#");
   1295                     writer.write(Integer.toString(ch));
   1296                     writer.write(';');
   1297                 }
   1298                 // In this character range we have first written out any previously accumulated
   1299                 // "clean" characters, then processed the current more complicated character,
   1300                 // which may have incremented "i".
   1301                 // We now we reset the next possible clean character.
   1302                 cleanStart = i + 1;
   1303             }
   1304             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
   1305             // not allowing quotes in the URI proper syntax, nor in the fragment
   1306             // identifier, we believe that it's OK to double escape quotes.
   1307             else if (ch == '"')
   1308             {
   1309                 // If the character is a '%' number number, try to avoid double-escaping.
   1310                 // There is a question if this is legal behavior.
   1311 
   1312                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
   1313                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
   1314 
   1315                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
   1316 
   1317                 // We are no longer escaping '%'
   1318 
   1319                 if (cleanLength > 0)
   1320                 {
   1321                     writer.write(chars, cleanStart, cleanLength);
   1322                     cleanLength = 0;
   1323                 }
   1324 
   1325 
   1326                 // Mike Kay encodes this as &#34;, so he may know something I don't?
   1327                 if (doURLEscaping)
   1328                     writer.write("%22");
   1329                 else
   1330                     writer.write("&quot;"); // we have to escape this, I guess.
   1331 
   1332                 // We have written out any clean characters, then the escaped '%' and now we
   1333                 // We now we reset the next possible clean character.
   1334                 cleanStart = i + 1;
   1335             }
   1336             else if (ch == '&')
   1337             {
   1338                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
   1339                 // instead of "&" to avoid confusion with the beginning of a character
   1340                 // reference (entity reference open delimiter).
   1341                 if (cleanLength > 0)
   1342                 {
   1343                     writer.write(chars, cleanStart, cleanLength);
   1344                     cleanLength = 0;
   1345                 }
   1346                 writer.write("&amp;");
   1347                 cleanStart = i + 1;
   1348             }
   1349             else
   1350             {
   1351                 // no processing for this character, just count how
   1352                 // many characters in a row that we have that need no processing
   1353                 cleanLength++;
   1354             }
   1355         }
   1356 
   1357         // are there any clean characters at the end of the array
   1358         // that we haven't processed yet?
   1359         if (cleanLength > 1)
   1360         {
   1361             // if the whole string can be written out as-is do so
   1362             // otherwise write out the clean chars at the end of the
   1363             // array
   1364             if (cleanStart == 0)
   1365                 writer.write(string);
   1366             else
   1367                 writer.write(chars, cleanStart, cleanLength);
   1368         }
   1369         else if (cleanLength == 1)
   1370         {
   1371             // a little optimization for 1 clean character
   1372             // (we could have let the previous if(...) handle them all)
   1373             writer.write(ch);
   1374         }
   1375     }
   1376 
   1377     /**
   1378      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
   1379      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
   1380      *
   1381      * @param   string      String to convert to XML format.
   1382      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
   1383      *
   1384      * @throws org.xml.sax.SAXException
   1385      */
   1386     public void writeAttrString(
   1387         final java.io.Writer writer, String string, String encoding)
   1388         throws IOException
   1389     {
   1390         final int end = string.length();
   1391         if (end > m_attrBuff.length)
   1392         {
   1393             m_attrBuff = new char[end * 2 + 1];
   1394         }
   1395         string.getChars(0, end, m_attrBuff, 0);
   1396         final char[] chars = m_attrBuff;
   1397 
   1398 
   1399 
   1400         int cleanStart = 0;
   1401         int cleanLength = 0;
   1402 
   1403         char ch = 0;
   1404         for (int i = 0; i < end; i++)
   1405         {
   1406             ch = chars[i];
   1407 
   1408             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
   1409             // System.out.println("ch: "+(int)ch);
   1410             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
   1411             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
   1412             if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
   1413             {
   1414                 cleanLength++;
   1415             }
   1416             else if ('<' == ch || '>' == ch)
   1417             {
   1418                 cleanLength++; // no escaping in this case, as specified in 15.2
   1419             }
   1420             else if (
   1421                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
   1422             {
   1423                 cleanLength++; // no escaping in this case, as specified in 15.2
   1424             }
   1425             else
   1426             {
   1427                 if (cleanLength > 0)
   1428                 {
   1429                     writer.write(chars,cleanStart,cleanLength);
   1430                     cleanLength = 0;
   1431                 }
   1432                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
   1433 
   1434                 if (i != pos)
   1435                 {
   1436                     i = pos - 1;
   1437                 }
   1438                 else
   1439                 {
   1440                     if (Encodings.isHighUTF16Surrogate(ch))
   1441                     {
   1442 
   1443                             writeUTF16Surrogate(ch, chars, i, end);
   1444                             i++; // two input characters processed
   1445                                  // this increments by one and the for()
   1446                                  // loop itself increments by another one.
   1447                     }
   1448 
   1449                     // The next is kind of a hack to keep from escaping in the case
   1450                     // of Shift_JIS and the like.
   1451 
   1452                     /*
   1453                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
   1454                     && (ch != 160))
   1455                     {
   1456                     writer.write(ch);  // no escaping in this case
   1457                     }
   1458                     else
   1459                     */
   1460                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
   1461                     if (null != outputStringForChar)
   1462                     {
   1463                         writer.write(outputStringForChar);
   1464                     }
   1465                     else if (escapingNotNeeded(ch))
   1466                     {
   1467                         writer.write(ch); // no escaping in this case
   1468                     }
   1469                     else
   1470                     {
   1471                         writer.write("&#");
   1472                         writer.write(Integer.toString(ch));
   1473                         writer.write(';');
   1474                     }
   1475                 }
   1476                 cleanStart = i + 1;
   1477             }
   1478         } // end of for()
   1479 
   1480         // are there any clean characters at the end of the array
   1481         // that we haven't processed yet?
   1482         if (cleanLength > 1)
   1483         {
   1484             // if the whole string can be written out as-is do so
   1485             // otherwise write out the clean chars at the end of the
   1486             // array
   1487             if (cleanStart == 0)
   1488                 writer.write(string);
   1489             else
   1490                 writer.write(chars, cleanStart, cleanLength);
   1491         }
   1492         else if (cleanLength == 1)
   1493         {
   1494             // a little optimization for 1 clean character
   1495             // (we could have let the previous if(...) handle them all)
   1496             writer.write(ch);
   1497         }
   1498     }
   1499 
   1500 
   1501 
   1502     /**
   1503      * Receive notification of character data.
   1504      *
   1505      * <p>The Parser will call this method to report each chunk of
   1506      * character data.  SAX parsers may return all contiguous character
   1507      * data in a single chunk, or they may split it into several
   1508      * chunks; however, all of the characters in any single event
   1509      * must come from the same external entity, so that the Locator
   1510      * provides useful information.</p>
   1511      *
   1512      * <p>The application must not attempt to read from the array
   1513      * outside of the specified range.</p>
   1514      *
   1515      * <p>Note that some parsers will report whitespace using the
   1516      * ignorableWhitespace() method rather than this one (validating
   1517      * parsers must do so).</p>
   1518      *
   1519      * @param chars The characters from the XML document.
   1520      * @param start The start position in the array.
   1521      * @param length The number of characters to read from the array.
   1522      * @throws org.xml.sax.SAXException Any SAX exception, possibly
   1523      *            wrapping another exception.
   1524      * @see #ignorableWhitespace
   1525      * @see org.xml.sax.Locator
   1526      *
   1527      * @throws org.xml.sax.SAXException
   1528      */
   1529     public final void characters(char chars[], int start, int length)
   1530         throws org.xml.sax.SAXException
   1531     {
   1532 
   1533         if (m_elemContext.m_isRaw)
   1534         {
   1535             try
   1536             {
   1537                 // Clean up some pending issues.
   1538                 if (m_elemContext.m_startTagOpen)
   1539                 {
   1540                     closeStartTag();
   1541                     m_elemContext.m_startTagOpen = false;
   1542                 }
   1543 
   1544                 m_ispreserve = true;
   1545 
   1546                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
   1547 
   1548                 // time to generate characters event
   1549                 if (m_tracer != null)
   1550                     super.fireCharEvent(chars, start, length);
   1551 
   1552                 return;
   1553             }
   1554             catch (IOException ioe)
   1555             {
   1556                 throw new org.xml.sax.SAXException(
   1557                     Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe);
   1558             }
   1559         }
   1560         else
   1561         {
   1562             super.characters(chars, start, length);
   1563         }
   1564     }
   1565 
   1566     /**
   1567      *  Receive notification of cdata.
   1568      *
   1569      *  <p>The Parser will call this method to report each chunk of
   1570      *  character data.  SAX parsers may return all contiguous character
   1571      *  data in a single chunk, or they may split it into several
   1572      *  chunks; however, all of the characters in any single event
   1573      *  must come from the same external entity, so that the Locator
   1574      *  provides useful information.</p>
   1575      *
   1576      *  <p>The application must not attempt to read from the array
   1577      *  outside of the specified range.</p>
   1578      *
   1579      *  <p>Note that some parsers will report whitespace using the
   1580      *  ignorableWhitespace() method rather than this one (validating
   1581      *  parsers must do so).</p>
   1582      *
   1583      *  @param ch The characters from the XML document.
   1584      *  @param start The start position in the array.
   1585      *  @param length The number of characters to read from the array.
   1586      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
   1587      *             wrapping another exception.
   1588      *  @see #ignorableWhitespace
   1589      *  @see org.xml.sax.Locator
   1590      *
   1591      * @throws org.xml.sax.SAXException
   1592      */
   1593     public final void cdata(char ch[], int start, int length)
   1594         throws org.xml.sax.SAXException
   1595     {
   1596 
   1597         if ((null != m_elemContext.m_elementName)
   1598             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
   1599                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
   1600         {
   1601             try
   1602             {
   1603                 if (m_elemContext.m_startTagOpen)
   1604                 {
   1605                     closeStartTag();
   1606                     m_elemContext.m_startTagOpen = false;
   1607                 }
   1608 
   1609                 m_ispreserve = true;
   1610 
   1611                 if (shouldIndent())
   1612                     indent();
   1613 
   1614                 // writer.write(ch, start, length);
   1615                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
   1616             }
   1617             catch (IOException ioe)
   1618             {
   1619                 throw new org.xml.sax.SAXException(
   1620                     Utils.messages.createMessage(
   1621                         MsgKey.ER_OIERROR,
   1622                         null),
   1623                     ioe);
   1624                 //"IO error", ioe);
   1625             }
   1626         }
   1627         else
   1628         {
   1629             super.cdata(ch, start, length);
   1630         }
   1631     }
   1632 
   1633     /**
   1634      *  Receive notification of a processing instruction.
   1635      *
   1636      *  @param target The processing instruction target.
   1637      *  @param data The processing instruction data, or null if
   1638      *         none was supplied.
   1639      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
   1640      *             wrapping another exception.
   1641      *
   1642      * @throws org.xml.sax.SAXException
   1643      */
   1644     public void processingInstruction(String target, String data)
   1645         throws org.xml.sax.SAXException
   1646     {
   1647 
   1648         // Process any pending starDocument and startElement first.
   1649         flushPending();
   1650 
   1651         // Use a fairly nasty hack to tell if the next node is supposed to be
   1652         // unescaped text.
   1653         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
   1654         {
   1655             startNonEscaping();
   1656         }
   1657         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
   1658         {
   1659             endNonEscaping();
   1660         }
   1661         else
   1662         {
   1663             try
   1664             {
   1665                 // clean up any pending things first
   1666                 if (m_elemContext.m_startTagOpen)
   1667                 {
   1668                     closeStartTag();
   1669                     m_elemContext.m_startTagOpen = false;
   1670                 }
   1671                 else if (m_cdataTagOpen)
   1672                 {
   1673                     closeCDATA();
   1674                 }
   1675                 else if (m_needToCallStartDocument)
   1676                 {
   1677                     startDocumentInternal();
   1678                 }
   1679 
   1680 
   1681             /*
   1682              * Perhaps processing instructions can be written out in HTML before
   1683              * the DOCTYPE, in which case this could be emitted with the
   1684              * startElement call, that knows the name of the document element
   1685              * doing it right.
   1686              */
   1687             if (true == m_needToOutputDocTypeDecl)
   1688                 outputDocTypeDecl("html"); // best guess for the upcoming element
   1689 
   1690 
   1691             if (shouldIndent())
   1692                 indent();
   1693 
   1694             final java.io.Writer writer = m_writer;
   1695             //writer.write("<?" + target);
   1696             writer.write("<?");
   1697             writer.write(target);
   1698 
   1699             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
   1700                 writer.write(' ');
   1701 
   1702             //writer.write(data + ">"); // different from XML
   1703             writer.write(data); // different from XML
   1704             writer.write('>'); // different from XML
   1705 
   1706             // Always output a newline char if not inside of an
   1707             // element. The whitespace is not significant in that
   1708             // case.
   1709             if (m_elemContext.m_currentElemDepth <= 0)
   1710                 outputLineSep();
   1711 
   1712             m_startNewLine = true;
   1713             }
   1714             catch(IOException e)
   1715             {
   1716                 throw new SAXException(e);
   1717             }
   1718         }
   1719 
   1720         // now generate the PI event
   1721         if (m_tracer != null)
   1722             super.fireEscapingEvent(target, data);
   1723      }
   1724 
   1725     /**
   1726      * Receive notivication of a entityReference.
   1727      *
   1728      * @param name non-null reference to entity name string.
   1729      *
   1730      * @throws org.xml.sax.SAXException
   1731      */
   1732     public final void entityReference(String name)
   1733         throws org.xml.sax.SAXException
   1734     {
   1735         try
   1736         {
   1737 
   1738         final java.io.Writer writer = m_writer;
   1739         writer.write('&');
   1740         writer.write(name);
   1741         writer.write(';');
   1742 
   1743         } catch(IOException e)
   1744         {
   1745             throw new SAXException(e);
   1746         }
   1747     }
   1748     /**
   1749      * @see ExtendedContentHandler#endElement(String)
   1750      */
   1751     public final void endElement(String elemName) throws SAXException
   1752     {
   1753         endElement(null, null, elemName);
   1754     }
   1755 
   1756     /**
   1757      * Process the attributes, which means to write out the currently
   1758      * collected attributes to the writer. The attributes are not
   1759      * cleared by this method
   1760      *
   1761      * @param writer the writer to write processed attributes to.
   1762      * @param nAttrs the number of attributes in m_attributes
   1763      * to be processed
   1764      *
   1765      * @throws org.xml.sax.SAXException
   1766      */
   1767     public void processAttributes(java.io.Writer writer, int nAttrs)
   1768         throws IOException,SAXException
   1769     {
   1770             /*
   1771              * process the collected attributes
   1772              */
   1773             for (int i = 0; i < nAttrs; i++)
   1774             {
   1775                 processAttribute(
   1776                     writer,
   1777                     m_attributes.getQName(i),
   1778                     m_attributes.getValue(i),
   1779                     m_elemContext.m_elementDesc);
   1780             }
   1781     }
   1782 
   1783     /**
   1784      * For the enclosing elements starting tag write out out any attributes
   1785      * followed by ">". At this point we also mark if this element is
   1786      * a cdata-section-element.
   1787      *
   1788      *@throws org.xml.sax.SAXException
   1789      */
   1790     protected void closeStartTag() throws SAXException
   1791     {
   1792             try
   1793             {
   1794 
   1795             // finish processing attributes, time to fire off the start element event
   1796             if (m_tracer != null)
   1797                 super.fireStartElem(m_elemContext.m_elementName);
   1798 
   1799             int nAttrs = m_attributes.getLength();
   1800             if (nAttrs>0)
   1801             {
   1802                 processAttributes(m_writer, nAttrs);
   1803                 // clear attributes object for re-use with next element
   1804                 m_attributes.clear();
   1805             }
   1806 
   1807             m_writer.write('>');
   1808 
   1809             /* At this point we have the prefix mappings now, so
   1810              * lets determine if the current element is specified in the cdata-
   1811              * section-elements list.
   1812              */
   1813             if (m_CdataElems != null) // if there are any cdata sections
   1814                 m_elemContext.m_isCdataSection = isCdataSection();
   1815             if (m_doIndent)
   1816             {
   1817                 m_isprevtext = false;
   1818                 m_preserves.push(m_ispreserve);
   1819             }
   1820 
   1821             }
   1822             catch(IOException e)
   1823             {
   1824                 throw new SAXException(e);
   1825             }
   1826     }
   1827 
   1828 
   1829 
   1830         /**
   1831          * This method is used when a prefix/uri namespace mapping
   1832          * is indicated after the element was started with a
   1833          * startElement() and before and endElement().
   1834          * startPrefixMapping(prefix,uri) would be used before the
   1835          * startElement() call.
   1836          * @param uri the URI of the namespace
   1837          * @param prefix the prefix associated with the given URI.
   1838          *
   1839          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
   1840          */
   1841         public void namespaceAfterStartElement(String prefix, String uri)
   1842             throws SAXException
   1843         {
   1844             // hack for XSLTC with finding URI for default namespace
   1845             if (m_elemContext.m_elementURI == null)
   1846             {
   1847                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
   1848                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
   1849                 {
   1850                     // the elements URI is not known yet, and it
   1851                     // doesn't have a prefix, and we are currently
   1852                     // setting the uri for prefix "", so we have
   1853                     // the uri for the element... lets remember it
   1854                     m_elemContext.m_elementURI = uri;
   1855                 }
   1856             }
   1857             startPrefixMapping(prefix,uri,false);
   1858         }
   1859 
   1860     public void startDTD(String name, String publicId, String systemId)
   1861         throws SAXException
   1862     {
   1863         m_inDTD = true;
   1864         super.startDTD(name, publicId, systemId);
   1865     }
   1866 
   1867     /**
   1868      * Report the end of DTD declarations.
   1869      * @throws org.xml.sax.SAXException The application may raise an exception.
   1870      * @see #startDTD
   1871      */
   1872     public void endDTD() throws org.xml.sax.SAXException
   1873     {
   1874         m_inDTD = false;
   1875         /* for ToHTMLStream the DOCTYPE is entirely output in the
   1876          * startDocumentInternal() method, so don't do anything here
   1877          */
   1878     }
   1879     /**
   1880      * This method does nothing.
   1881      */
   1882     public void attributeDecl(
   1883         String eName,
   1884         String aName,
   1885         String type,
   1886         String valueDefault,
   1887         String value)
   1888         throws SAXException
   1889     {
   1890         // The internal DTD subset is not serialized by the ToHTMLStream serializer
   1891     }
   1892 
   1893     /**
   1894      * This method does nothing.
   1895      */
   1896     public void elementDecl(String name, String model) throws SAXException
   1897     {
   1898         // The internal DTD subset is not serialized by the ToHTMLStream serializer
   1899     }
   1900     /**
   1901      * This method does nothing.
   1902      */
   1903     public void internalEntityDecl(String name, String value)
   1904         throws SAXException
   1905     {
   1906         // The internal DTD subset is not serialized by the ToHTMLStream serializer
   1907     }
   1908     /**
   1909      * This method does nothing.
   1910      */
   1911     public void externalEntityDecl(
   1912         String name,
   1913         String publicId,
   1914         String systemId)
   1915         throws SAXException
   1916     {
   1917         // The internal DTD subset is not serialized by the ToHTMLStream serializer
   1918     }
   1919 
   1920     /**
   1921      * This method is used to add an attribute to the currently open element.
   1922      * The caller has guaranted that this attribute is unique, which means that it
   1923      * not been seen before and will not be seen again.
   1924      *
   1925      * @param name the qualified name of the attribute
   1926      * @param value the value of the attribute which can contain only
   1927      * ASCII printable characters characters in the range 32 to 127 inclusive.
   1928      * @param flags the bit values of this integer give optimization information.
   1929      */
   1930     public void addUniqueAttribute(String name, String value, int flags)
   1931         throws SAXException
   1932     {
   1933         try
   1934         {
   1935             final java.io.Writer writer = m_writer;
   1936             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
   1937             {
   1938                 // "flags" has indicated that the characters
   1939                 // '>'  '<'   '&'  and '"' are not in the value and
   1940                 // m_htmlcharInfo has recorded that there are no other
   1941                 // entities in the range 0 to 127 so we write out the
   1942                 // value directly
   1943                 writer.write(' ');
   1944                 writer.write(name);
   1945                 writer.write("=\"");
   1946                 writer.write(value);
   1947                 writer.write('"');
   1948             }
   1949             else if (
   1950                 (flags & HTML_ATTREMPTY) > 0
   1951                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
   1952             {
   1953                 writer.write(' ');
   1954                 writer.write(name);
   1955             }
   1956             else
   1957             {
   1958                 writer.write(' ');
   1959                 writer.write(name);
   1960                 writer.write("=\"");
   1961                 if ((flags & HTML_ATTRURL) > 0)
   1962                 {
   1963                     writeAttrURI(writer, value, m_specialEscapeURLs);
   1964                 }
   1965                 else
   1966                 {
   1967                     writeAttrString(writer, value, this.getEncoding());
   1968                 }
   1969                 writer.write('"');
   1970             }
   1971         } catch (IOException e) {
   1972             throw new SAXException(e);
   1973         }
   1974     }
   1975 
   1976     public void comment(char ch[], int start, int length)
   1977             throws SAXException
   1978     {
   1979         // The internal DTD subset is not serialized by the ToHTMLStream serializer
   1980         if (m_inDTD)
   1981             return;
   1982 
   1983         // Clean up some pending issues, just in case
   1984         // this call is coming right after a startElement()
   1985         // or we are in the middle of writing out CDATA
   1986         // or if a startDocument() call was not received
   1987         if (m_elemContext.m_startTagOpen)
   1988         {
   1989             closeStartTag();
   1990             m_elemContext.m_startTagOpen = false;
   1991         }
   1992         else if (m_cdataTagOpen)
   1993         {
   1994             closeCDATA();
   1995         }
   1996         else if (m_needToCallStartDocument)
   1997         {
   1998             startDocumentInternal();
   1999         }
   2000 
   2001         /*
   2002          * Perhaps comments can be written out in HTML before the DOCTYPE.
   2003          * In this case we might delete this call to writeOutDOCTYPE, and
   2004          * it would be handled within the startElement() call.
   2005          */
   2006         if (m_needToOutputDocTypeDecl)
   2007             outputDocTypeDecl("html"); // best guess for the upcoming element
   2008 
   2009         super.comment(ch, start, length);
   2010     }
   2011 
   2012     public boolean reset()
   2013     {
   2014         boolean ret = super.reset();
   2015         if (!ret)
   2016             return false;
   2017         resetToHTMLStream();
   2018         return true;
   2019     }
   2020 
   2021     private void resetToHTMLStream()
   2022     {
   2023         // m_htmlcharInfo remains unchanged
   2024         // m_htmlInfo = null;  // Don't reset
   2025         m_inBlockElem = false;
   2026         m_inDTD = false;
   2027         m_omitMetaTag = false;
   2028         m_specialEscapeURLs = true;
   2029     }
   2030 
   2031     static class Trie
   2032     {
   2033         /**
   2034          * A digital search trie for 7-bit ASCII text
   2035          * The API is a subset of java.util.Hashtable
   2036          * The key must be a 7-bit ASCII string
   2037          * The value may be any Java Object
   2038          * One can get an object stored in a trie from its key,
   2039          * but the search is either case sensitive or case
   2040          * insensitive to the characters in the key, and this
   2041          * choice of sensitivity or insensitivity is made when
   2042          * the Trie is created, before any objects are put in it.
   2043          *
   2044          * This class is a copy of the one in org.apache.xml.utils.
   2045          * It exists to cut the serializers dependancy on that package.
   2046          *
   2047          * @xsl.usage internal
   2048          */
   2049 
   2050         /** Size of the m_nextChar array.  */
   2051         public static final int ALPHA_SIZE = 128;
   2052 
   2053         /** The root node of the tree.    */
   2054         final Node m_Root;
   2055 
   2056         /** helper buffer to convert Strings to char arrays */
   2057         private char[] m_charBuffer = new char[0];
   2058 
   2059         /** true if the search for an object is lower case only with the key */
   2060         private final boolean m_lowerCaseOnly;
   2061 
   2062         /**
   2063          * Construct the trie that has a case insensitive search.
   2064          */
   2065         public Trie()
   2066         {
   2067             m_Root = new Node();
   2068             m_lowerCaseOnly = false;
   2069         }
   2070 
   2071         /**
   2072          * Construct the trie given the desired case sensitivity with the key.
   2073          * @param lowerCaseOnly true if the search keys are to be loser case only,
   2074          * not case insensitive.
   2075          */
   2076         public Trie(boolean lowerCaseOnly)
   2077         {
   2078             m_Root = new Node();
   2079             m_lowerCaseOnly = lowerCaseOnly;
   2080         }
   2081 
   2082         /**
   2083          * Put an object into the trie for lookup.
   2084          *
   2085          * @param key must be a 7-bit ASCII string
   2086          * @param value any java object.
   2087          *
   2088          * @return The old object that matched key, or null.
   2089          */
   2090         public Object put(String key, Object value)
   2091         {
   2092 
   2093             final int len = key.length();
   2094             if (len > m_charBuffer.length)
   2095             {
   2096                 // make the biggest buffer ever needed in get(String)
   2097                 m_charBuffer = new char[len];
   2098             }
   2099 
   2100             Node node = m_Root;
   2101 
   2102             for (int i = 0; i < len; i++)
   2103             {
   2104                 Node nextNode =
   2105                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
   2106 
   2107                 if (nextNode != null)
   2108                 {
   2109                     node = nextNode;
   2110                 }
   2111                 else
   2112                 {
   2113                     for (; i < len; i++)
   2114                     {
   2115                         Node newNode = new Node();
   2116                         if (m_lowerCaseOnly)
   2117                         {
   2118                             // put this value into the tree only with a lower case key
   2119                             node.m_nextChar[Character.toLowerCase(
   2120                                 key.charAt(i))] =
   2121                                 newNode;
   2122                         }
   2123                         else
   2124                         {
   2125                             // put this value into the tree with a case insensitive key
   2126                             node.m_nextChar[Character.toUpperCase(
   2127                                 key.charAt(i))] =
   2128                                 newNode;
   2129                             node.m_nextChar[Character.toLowerCase(
   2130                                 key.charAt(i))] =
   2131                                 newNode;
   2132                         }
   2133                         node = newNode;
   2134                     }
   2135                     break;
   2136                 }
   2137             }
   2138 
   2139             Object ret = node.m_Value;
   2140 
   2141             node.m_Value = value;
   2142 
   2143             return ret;
   2144         }
   2145 
   2146         /**
   2147          * Get an object that matches the key.
   2148          *
   2149          * @param key must be a 7-bit ASCII string
   2150          *
   2151          * @return The object that matches the key, or null.
   2152          */
   2153         public Object get(final String key)
   2154         {
   2155 
   2156             final int len = key.length();
   2157 
   2158             /* If the name is too long, we won't find it, this also keeps us
   2159              * from overflowing m_charBuffer
   2160              */
   2161             if (m_charBuffer.length < len)
   2162                 return null;
   2163 
   2164             Node node = m_Root;
   2165             switch (len) // optimize the look up based on the number of chars
   2166             {
   2167                 // case 0 looks silly, but the generated bytecode runs
   2168                 // faster for lookup of elements of length 2 with this in
   2169                 // and a fair bit faster.  Don't know why.
   2170                 case 0 :
   2171                     {
   2172                         return null;
   2173                     }
   2174 
   2175                 case 1 :
   2176                     {
   2177                         final char ch = key.charAt(0);
   2178                         if (ch < ALPHA_SIZE)
   2179                         {
   2180                             node = node.m_nextChar[ch];
   2181                             if (node != null)
   2182                                 return node.m_Value;
   2183                         }
   2184                         return null;
   2185                     }
   2186                     //                comment out case 2 because the default is faster
   2187                     //                case 2 :
   2188                     //                    {
   2189                     //                        final char ch0 = key.charAt(0);
   2190                     //                        final char ch1 = key.charAt(1);
   2191                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
   2192                     //                        {
   2193                     //                            node = node.m_nextChar[ch0];
   2194                     //                            if (node != null)
   2195                     //                            {
   2196                     //
   2197                     //                                if (ch1 < ALPHA_SIZE)
   2198                     //                                {
   2199                     //                                    node = node.m_nextChar[ch1];
   2200                     //                                    if (node != null)
   2201                     //                                        return node.m_Value;
   2202                     //                                }
   2203                     //                            }
   2204                     //                        }
   2205                     //                        return null;
   2206                     //                   }
   2207                 default :
   2208                     {
   2209                         for (int i = 0; i < len; i++)
   2210                         {
   2211                             // A thread-safe way to loop over the characters
   2212                             final char ch = key.charAt(i);
   2213                             if (ALPHA_SIZE <= ch)
   2214                             {
   2215                                 // the key is not 7-bit ASCII so we won't find it here
   2216                                 return null;
   2217                             }
   2218 
   2219                             node = node.m_nextChar[ch];
   2220                             if (node == null)
   2221                                 return null;
   2222                         }
   2223 
   2224                         return node.m_Value;
   2225                     }
   2226             }
   2227         }
   2228 
   2229         /**
   2230          * The node representation for the trie.
   2231          * @xsl.usage internal
   2232          */
   2233         private class Node
   2234         {
   2235 
   2236             /**
   2237              * Constructor, creates a Node[ALPHA_SIZE].
   2238              */
   2239             Node()
   2240             {
   2241                 m_nextChar = new Node[ALPHA_SIZE];
   2242                 m_Value = null;
   2243             }
   2244 
   2245             /** The next nodes.   */
   2246             final Node m_nextChar[];
   2247 
   2248             /** The value.   */
   2249             Object m_Value;
   2250         }
   2251         /**
   2252          * Construct the trie from another Trie.
   2253          * Both the existing Trie and this new one share the same table for
   2254          * lookup, and it is assumed that the table is fully populated and
   2255          * not changing anymore.
   2256          *
   2257          * @param existingTrie the Trie that this one is a copy of.
   2258          */
   2259         public Trie(Trie existingTrie)
   2260         {
   2261             // copy some fields from the existing Trie into this one.
   2262             m_Root = existingTrie.m_Root;
   2263             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
   2264 
   2265             // get a buffer just big enough to hold the longest key in the table.
   2266             int max = existingTrie.getLongestKeyLength();
   2267             m_charBuffer = new char[max];
   2268         }
   2269 
   2270         /**
   2271          * Get an object that matches the key.
   2272          * This method is faster than get(), but is not thread-safe.
   2273          *
   2274          * @param key must be a 7-bit ASCII string
   2275          *
   2276          * @return The object that matches the key, or null.
   2277          */
   2278         public Object get2(final String key)
   2279         {
   2280 
   2281             final int len = key.length();
   2282 
   2283             /* If the name is too long, we won't find it, this also keeps us
   2284              * from overflowing m_charBuffer
   2285              */
   2286             if (m_charBuffer.length < len)
   2287                 return null;
   2288 
   2289             Node node = m_Root;
   2290             switch (len) // optimize the look up based on the number of chars
   2291             {
   2292                 // case 0 looks silly, but the generated bytecode runs
   2293                 // faster for lookup of elements of length 2 with this in
   2294                 // and a fair bit faster.  Don't know why.
   2295                 case 0 :
   2296                     {
   2297                         return null;
   2298                     }
   2299 
   2300                 case 1 :
   2301                     {
   2302                         final char ch = key.charAt(0);
   2303                         if (ch < ALPHA_SIZE)
   2304                         {
   2305                             node = node.m_nextChar[ch];
   2306                             if (node != null)
   2307                                 return node.m_Value;
   2308                         }
   2309                         return null;
   2310                     }
   2311                 default :
   2312                     {
   2313                         /* Copy string into array. This is not thread-safe because
   2314                          * it modifies the contents of m_charBuffer. If multiple
   2315                          * threads were to use this Trie they all would be
   2316                          * using this same array (not good). So this
   2317                          * method is not thread-safe, but it is faster because
   2318                          * converting to a char[] and looping over elements of
   2319                          * the array is faster than a String's charAt(i).
   2320                          */
   2321                         key.getChars(0, len, m_charBuffer, 0);
   2322 
   2323                         for (int i = 0; i < len; i++)
   2324                         {
   2325                             final char ch = m_charBuffer[i];
   2326                             if (ALPHA_SIZE <= ch)
   2327                             {
   2328                                 // the key is not 7-bit ASCII so we won't find it here
   2329                                 return null;
   2330                             }
   2331 
   2332                             node = node.m_nextChar[ch];
   2333                             if (node == null)
   2334                                 return null;
   2335                         }
   2336 
   2337                         return node.m_Value;
   2338                     }
   2339             }
   2340         }
   2341 
   2342         /**
   2343          * Get the length of the longest key used in the table.
   2344          */
   2345         public int getLongestKeyLength()
   2346         {
   2347             return m_charBuffer.length;
   2348         }
   2349     }
   2350 }
   2351