1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: ToHTMLStream.java 468654 2006-10-28 07:09:23Z minchau $ 20 */ 21 package org.apache.xml.serializer; 22 23 import java.io.IOException; 24 import java.util.Properties; 25 26 import javax.xml.transform.Result; 27 28 import org.apache.xml.serializer.utils.MsgKey; 29 import org.apache.xml.serializer.utils.Utils; 30 import org.xml.sax.Attributes; 31 import org.xml.sax.SAXException; 32 33 /** 34 * This serializer takes a series of SAX or 35 * SAX-like events and writes its output 36 * to the given stream. 37 * 38 * This class is not a public API, it is public 39 * because it is used from another package. 40 * 41 * @xsl.usage internal 42 */ 43 public class ToHTMLStream extends ToStream 44 { 45 46 /** This flag is set while receiving events from the DTD */ 47 protected boolean m_inDTD = false; 48 49 /** True if the current element is a block element. (seems like 50 * this needs to be a stack. -sb). */ 51 private boolean m_inBlockElem = false; 52 53 /** 54 * Map that tells which XML characters should have special treatment, and it 55 * provides character to entity name lookup. 56 */ 57 private final CharInfo m_htmlcharInfo = 58 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE); 59 CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML); 60 61 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */ 62 static final Trie m_elementFlags = new Trie(); 63 64 static { 65 initTagReference(m_elementFlags); 66 } 67 static void initTagReference(Trie m_elementFlags) { 68 69 // HTML 4.0 loose DTD 70 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY)); 71 m_elementFlags.put( 72 "FRAME", 73 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 74 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK)); 75 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK)); 76 m_elementFlags.put( 77 "ISINDEX", 78 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 79 m_elementFlags.put( 80 "APPLET", 81 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE)); 82 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK)); 83 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK)); 84 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK)); 85 86 // HTML 4.0 strict DTD 87 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 88 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 89 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 90 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 91 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 92 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE)); 93 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE)); 94 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE)); 95 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE)); 96 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE)); 97 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE)); 98 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE)); 99 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE)); 100 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE)); 101 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE)); 102 m_elementFlags.put( 103 "SUP", 104 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 105 m_elementFlags.put( 106 "SUB", 107 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 108 m_elementFlags.put( 109 "SPAN", 110 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 111 m_elementFlags.put( 112 "BDO", 113 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 114 m_elementFlags.put( 115 "BR", 116 new ElemDesc( 117 0 118 | ElemDesc.SPECIAL 119 | ElemDesc.ASPECIAL 120 | ElemDesc.EMPTY 121 | ElemDesc.BLOCK)); 122 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK)); 123 m_elementFlags.put( 124 "ADDRESS", 125 new ElemDesc( 126 0 127 | ElemDesc.BLOCK 128 | ElemDesc.BLOCKFORM 129 | ElemDesc.BLOCKFORMFIELDSET)); 130 m_elementFlags.put( 131 "DIV", 132 new ElemDesc( 133 0 134 | ElemDesc.BLOCK 135 | ElemDesc.BLOCKFORM 136 | ElemDesc.BLOCKFORMFIELDSET)); 137 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL)); 138 m_elementFlags.put( 139 "MAP", 140 new ElemDesc( 141 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK)); 142 m_elementFlags.put( 143 "AREA", 144 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 145 m_elementFlags.put( 146 "LINK", 147 new ElemDesc( 148 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 149 m_elementFlags.put( 150 "IMG", 151 new ElemDesc( 152 0 153 | ElemDesc.SPECIAL 154 | ElemDesc.ASPECIAL 155 | ElemDesc.EMPTY 156 | ElemDesc.WHITESPACESENSITIVE)); 157 m_elementFlags.put( 158 "OBJECT", 159 new ElemDesc( 160 0 161 | ElemDesc.SPECIAL 162 | ElemDesc.ASPECIAL 163 | ElemDesc.HEADMISC 164 | ElemDesc.WHITESPACESENSITIVE)); 165 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY)); 166 m_elementFlags.put( 167 "HR", 168 new ElemDesc( 169 0 170 | ElemDesc.BLOCK 171 | ElemDesc.BLOCKFORM 172 | ElemDesc.BLOCKFORMFIELDSET 173 | ElemDesc.EMPTY)); 174 m_elementFlags.put( 175 "P", 176 new ElemDesc( 177 0 178 | ElemDesc.BLOCK 179 | ElemDesc.BLOCKFORM 180 | ElemDesc.BLOCKFORMFIELDSET)); 181 m_elementFlags.put( 182 "H1", 183 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 184 m_elementFlags.put( 185 "H2", 186 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 187 m_elementFlags.put( 188 "H3", 189 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 190 m_elementFlags.put( 191 "H4", 192 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 193 m_elementFlags.put( 194 "H5", 195 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 196 m_elementFlags.put( 197 "H6", 198 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK)); 199 m_elementFlags.put( 200 "PRE", 201 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK)); 202 m_elementFlags.put( 203 "Q", 204 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL)); 205 m_elementFlags.put( 206 "BLOCKQUOTE", 207 new ElemDesc( 208 0 209 | ElemDesc.BLOCK 210 | ElemDesc.BLOCKFORM 211 | ElemDesc.BLOCKFORMFIELDSET)); 212 m_elementFlags.put("INS", new ElemDesc(0)); 213 m_elementFlags.put("DEL", new ElemDesc(0)); 214 m_elementFlags.put( 215 "DL", 216 new ElemDesc( 217 0 218 | ElemDesc.BLOCK 219 | ElemDesc.BLOCKFORM 220 | ElemDesc.BLOCKFORMFIELDSET)); 221 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK)); 222 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK)); 223 m_elementFlags.put( 224 "OL", 225 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 226 m_elementFlags.put( 227 "UL", 228 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK)); 229 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK)); 230 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK)); 231 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL)); 232 m_elementFlags.put( 233 "INPUT", 234 new ElemDesc( 235 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY)); 236 m_elementFlags.put( 237 "SELECT", 238 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 239 m_elementFlags.put("OPTGROUP", new ElemDesc(0)); 240 m_elementFlags.put("OPTION", new ElemDesc(0)); 241 m_elementFlags.put( 242 "TEXTAREA", 243 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 244 m_elementFlags.put( 245 "FIELDSET", 246 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM)); 247 m_elementFlags.put("LEGEND", new ElemDesc(0)); 248 m_elementFlags.put( 249 "BUTTON", 250 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL)); 251 m_elementFlags.put( 252 "TABLE", 253 new ElemDesc( 254 0 255 | ElemDesc.BLOCK 256 | ElemDesc.BLOCKFORM 257 | ElemDesc.BLOCKFORMFIELDSET)); 258 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK)); 259 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK)); 260 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK)); 261 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK)); 262 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK)); 263 m_elementFlags.put( 264 "COL", 265 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 266 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK)); 267 m_elementFlags.put("TH", new ElemDesc(0)); 268 m_elementFlags.put("TD", new ElemDesc(0)); 269 m_elementFlags.put( 270 "HEAD", 271 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM)); 272 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK)); 273 m_elementFlags.put( 274 "BASE", 275 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK)); 276 m_elementFlags.put( 277 "META", 278 new ElemDesc( 279 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK)); 280 m_elementFlags.put( 281 "STYLE", 282 new ElemDesc( 283 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK)); 284 m_elementFlags.put( 285 "SCRIPT", 286 new ElemDesc( 287 0 288 | ElemDesc.SPECIAL 289 | ElemDesc.ASPECIAL 290 | ElemDesc.HEADMISC 291 | ElemDesc.RAW)); 292 m_elementFlags.put( 293 "NOSCRIPT", 294 new ElemDesc( 295 0 296 | ElemDesc.BLOCK 297 | ElemDesc.BLOCKFORM 298 | ElemDesc.BLOCKFORMFIELDSET)); 299 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM)); 300 301 // From "John Ky" <hand (at) syd.speednet.com.au 302 // Transitional Document Type Definition () 303 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont 304 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 305 306 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE 307 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 308 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 309 310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U 311 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 312 313 // From "John Ky" <hand (at) syd.speednet.com.au 314 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE)); 315 316 // HTML 4.0, section 16.5 317 m_elementFlags.put( 318 "IFRAME", 319 new ElemDesc( 320 0 321 | ElemDesc.BLOCK 322 | ElemDesc.BLOCKFORM 323 | ElemDesc.BLOCKFORMFIELDSET)); 324 325 // Netscape 4 extension 326 m_elementFlags.put( 327 "LAYER", 328 new ElemDesc( 329 0 330 | ElemDesc.BLOCK 331 | ElemDesc.BLOCKFORM 332 | ElemDesc.BLOCKFORMFIELDSET)); 333 // Netscape 4 extension 334 m_elementFlags.put( 335 "ILAYER", 336 new ElemDesc( 337 0 338 | ElemDesc.BLOCK 339 | ElemDesc.BLOCKFORM 340 | ElemDesc.BLOCKFORMFIELDSET)); 341 342 // NOW FOR ATTRIBUTE INFORMATION . . . 343 ElemDesc elemDesc; 344 345 346 // ---------------------------------------------- 347 elemDesc = (ElemDesc) m_elementFlags.get("a"); 348 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 349 elemDesc.setAttr("NAME", ElemDesc.ATTRURL); 350 351 // ---------------------------------------------- 352 elemDesc = (ElemDesc) m_elementFlags.get("area"); 353 354 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 355 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY); 356 357 // ---------------------------------------------- 358 elemDesc = (ElemDesc) m_elementFlags.get("base"); 359 360 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 361 362 // ---------------------------------------------- 363 elemDesc = (ElemDesc) m_elementFlags.get("button"); 364 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 365 366 // ---------------------------------------------- 367 elemDesc = (ElemDesc) m_elementFlags.get("blockquote"); 368 369 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 370 371 // ---------------------------------------------- 372 elemDesc = (ElemDesc) m_elementFlags.get("del"); 373 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 374 375 // ---------------------------------------------- 376 elemDesc = (ElemDesc) m_elementFlags.get("dir"); 377 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 378 379 // ---------------------------------------------- 380 381 elemDesc = (ElemDesc) m_elementFlags.get("div"); 382 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension 383 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 384 385 // ---------------------------------------------- 386 elemDesc = (ElemDesc) m_elementFlags.get("dl"); 387 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 388 389 // ---------------------------------------------- 390 elemDesc = (ElemDesc) m_elementFlags.get("form"); 391 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL); 392 393 // ---------------------------------------------- 394 // Attribution to: "Voytenko, Dimitry" <DVoytenko (at) SECTORBASE.COM> 395 elemDesc = (ElemDesc) m_elementFlags.get("frame"); 396 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 397 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 398 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY); 399 400 // ---------------------------------------------- 401 elemDesc = (ElemDesc) m_elementFlags.get("head"); 402 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL); 403 404 // ---------------------------------------------- 405 elemDesc = (ElemDesc) m_elementFlags.get("hr"); 406 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY); 407 408 // ---------------------------------------------- 409 // HTML 4.0, section 16.5 410 elemDesc = (ElemDesc) m_elementFlags.get("iframe"); 411 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 412 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 413 414 // ---------------------------------------------- 415 // Netscape 4 extension 416 elemDesc = (ElemDesc) m_elementFlags.get("ilayer"); 417 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 418 419 // ---------------------------------------------- 420 elemDesc = (ElemDesc) m_elementFlags.get("img"); 421 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 422 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL); 423 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 424 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 425 426 // ---------------------------------------------- 427 elemDesc = (ElemDesc) m_elementFlags.get("input"); 428 429 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 430 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 431 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY); 432 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 433 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY); 434 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 435 436 // ---------------------------------------------- 437 elemDesc = (ElemDesc) m_elementFlags.get("ins"); 438 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 439 440 // ---------------------------------------------- 441 // Netscape 4 extension 442 elemDesc = (ElemDesc) m_elementFlags.get("layer"); 443 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 444 445 // ---------------------------------------------- 446 elemDesc = (ElemDesc) m_elementFlags.get("link"); 447 elemDesc.setAttr("HREF", ElemDesc.ATTRURL); 448 449 // ---------------------------------------------- 450 elemDesc = (ElemDesc) m_elementFlags.get("menu"); 451 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 452 453 // ---------------------------------------------- 454 elemDesc = (ElemDesc) m_elementFlags.get("object"); 455 456 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL); 457 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL); 458 elemDesc.setAttr("DATA", ElemDesc.ATTRURL); 459 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL); 460 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL); 461 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY); 462 463 // ---------------------------------------------- 464 elemDesc = (ElemDesc) m_elementFlags.get("ol"); 465 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 466 467 // ---------------------------------------------- 468 elemDesc = (ElemDesc) m_elementFlags.get("optgroup"); 469 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 470 471 // ---------------------------------------------- 472 elemDesc = (ElemDesc) m_elementFlags.get("option"); 473 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY); 474 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 475 476 // ---------------------------------------------- 477 elemDesc = (ElemDesc) m_elementFlags.get("q"); 478 elemDesc.setAttr("CITE", ElemDesc.ATTRURL); 479 480 // ---------------------------------------------- 481 elemDesc = (ElemDesc) m_elementFlags.get("script"); 482 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); 483 elemDesc.setAttr("FOR", ElemDesc.ATTRURL); 484 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY); 485 486 // ---------------------------------------------- 487 elemDesc = (ElemDesc) m_elementFlags.get("select"); 488 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 489 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY); 490 491 // ---------------------------------------------- 492 elemDesc = (ElemDesc) m_elementFlags.get("table"); 493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension 494 495 // ---------------------------------------------- 496 elemDesc = (ElemDesc) m_elementFlags.get("td"); 497 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 498 499 // ---------------------------------------------- 500 elemDesc = (ElemDesc) m_elementFlags.get("textarea"); 501 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY); 502 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY); 503 504 // ---------------------------------------------- 505 elemDesc = (ElemDesc) m_elementFlags.get("th"); 506 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 507 508 // ---------------------------------------------- 509 // The nowrap attribute of a tr element is both 510 // a Netscape and Internet-Explorer extension 511 elemDesc = (ElemDesc) m_elementFlags.get("tr"); 512 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); 513 514 // ---------------------------------------------- 515 elemDesc = (ElemDesc) m_elementFlags.get("ul"); 516 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY); 517 } 518 519 /** 520 * Dummy element for elements not found. 521 */ 522 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK); 523 524 /** True if URLs should be specially escaped with the %xx form. */ 525 private boolean m_specialEscapeURLs = true; 526 527 /** True if the META tag should be omitted. */ 528 private boolean m_omitMetaTag = false; 529 530 /** 531 * Tells if the formatter should use special URL escaping. 532 * 533 * @param bool True if URLs should be specially escaped with the %xx form. 534 */ 535 public void setSpecialEscapeURLs(boolean bool) 536 { 537 m_specialEscapeURLs = bool; 538 } 539 540 /** 541 * Tells if the formatter should omit the META tag. 542 * 543 * @param bool True if the META tag should be omitted. 544 */ 545 public void setOmitMetaTag(boolean bool) 546 { 547 m_omitMetaTag = bool; 548 } 549 550 /** 551 * Specifies an output format for this serializer. It the 552 * serializer has already been associated with an output format, 553 * it will switch to the new format. This method should not be 554 * called while the serializer is in the process of serializing 555 * a document. 556 * 557 * This method can be called multiple times before starting 558 * the serialization of a particular result-tree. In principle 559 * all serialization parameters can be changed, with the exception 560 * of method="html" (it must be method="html" otherwise we 561 * shouldn't even have a ToHTMLStream object here!) 562 * 563 * @param format The output format or serialzation parameters 564 * to use. 565 */ 566 public void setOutputFormat(Properties format) 567 { 568 /* 569 * If "format" does not contain the property 570 * S_USE_URL_ESCAPING, then don't set this value at all, 571 * just leave as-is rather than explicitly setting it. 572 */ 573 String value; 574 value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING); 575 if (value != null) { 576 m_specialEscapeURLs = 577 OutputPropertyUtils.getBooleanProperty( 578 OutputPropertiesFactory.S_USE_URL_ESCAPING, 579 format); 580 } 581 582 /* 583 * If "format" does not contain the property 584 * S_OMIT_META_TAG, then don't set this value at all, 585 * just leave as-is rather than explicitly setting it. 586 */ 587 value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG); 588 if (value != null) { 589 m_omitMetaTag = 590 OutputPropertyUtils.getBooleanProperty( 591 OutputPropertiesFactory.S_OMIT_META_TAG, 592 format); 593 } 594 595 super.setOutputFormat(format); 596 } 597 598 /** 599 * Tells if the formatter should use special URL escaping. 600 * 601 * @return True if URLs should be specially escaped with the %xx form. 602 */ 603 private final boolean getSpecialEscapeURLs() 604 { 605 return m_specialEscapeURLs; 606 } 607 608 /** 609 * Tells if the formatter should omit the META tag. 610 * 611 * @return True if the META tag should be omitted. 612 */ 613 private final boolean getOmitMetaTag() 614 { 615 return m_omitMetaTag; 616 } 617 618 /** 619 * Get a description of the given element. 620 * 621 * @param name non-null name of element, case insensitive. 622 * 623 * @return non-null reference to ElemDesc, which may be m_dummy if no 624 * element description matches the given name. 625 */ 626 public static final ElemDesc getElemDesc(String name) 627 { 628 /* this method used to return m_dummy when name was null 629 * but now it doesn't check and and requires non-null name. 630 */ 631 Object obj = m_elementFlags.get(name); 632 if (null != obj) 633 return (ElemDesc)obj; 634 return m_dummy; 635 } 636 637 638 /** 639 * A Trie that is just a copy of the "static" one. 640 * We need this one to be able to use the faster, but not thread-safe 641 * method Trie.get2(name) 642 */ 643 private Trie m_htmlInfo = new Trie(m_elementFlags); 644 /** 645 * Calls to this method could be replaced with calls to 646 * getElemDesc(name), but this one should be faster. 647 */ 648 private ElemDesc getElemDesc2(String name) 649 { 650 Object obj = m_htmlInfo.get2(name); 651 if (null != obj) 652 return (ElemDesc)obj; 653 return m_dummy; 654 } 655 656 /** 657 * Default constructor. 658 */ 659 public ToHTMLStream() 660 { 661 662 super(); 663 // we are just constructing this thing, no output properties 664 // have been used, so we will set the right default for 665 // indenting anyways 666 m_doIndent = true; 667 m_charInfo = m_htmlcharInfo; 668 // initialize namespaces 669 m_prefixMap = new NamespaceMappings(); 670 671 } 672 673 /** The name of the current element. */ 674 // private String m_currentElementName = null; 675 676 /** 677 * Receive notification of the beginning of a document. 678 * 679 * @throws org.xml.sax.SAXException Any SAX exception, possibly 680 * wrapping another exception. 681 * 682 * @throws org.xml.sax.SAXException 683 */ 684 protected void startDocumentInternal() throws org.xml.sax.SAXException 685 { 686 super.startDocumentInternal(); 687 688 m_needToCallStartDocument = false; 689 m_needToOutputDocTypeDecl = true; 690 m_startNewLine = false; 691 setOmitXMLDeclaration(true); 692 } 693 694 /** 695 * This method should only get called once. 696 * If a DOCTYPE declaration needs to get written out, it will 697 * be written out. If it doesn't need to be written out, then 698 * the call to this method has no effect. 699 */ 700 private void outputDocTypeDecl(String name) throws SAXException { 701 if (true == m_needToOutputDocTypeDecl) 702 { 703 String doctypeSystem = getDoctypeSystem(); 704 String doctypePublic = getDoctypePublic(); 705 if ((null != doctypeSystem) || (null != doctypePublic)) 706 { 707 final java.io.Writer writer = m_writer; 708 try 709 { 710 writer.write("<!DOCTYPE "); 711 writer.write(name); 712 713 if (null != doctypePublic) 714 { 715 writer.write(" PUBLIC \""); 716 writer.write(doctypePublic); 717 writer.write('"'); 718 } 719 720 if (null != doctypeSystem) 721 { 722 if (null == doctypePublic) 723 writer.write(" SYSTEM \""); 724 else 725 writer.write(" \""); 726 727 writer.write(doctypeSystem); 728 writer.write('"'); 729 } 730 731 writer.write('>'); 732 outputLineSep(); 733 } 734 catch(IOException e) 735 { 736 throw new SAXException(e); 737 } 738 } 739 } 740 741 m_needToOutputDocTypeDecl = false; 742 } 743 744 /** 745 * Receive notification of the end of a document. 746 * 747 * @throws org.xml.sax.SAXException Any SAX exception, possibly 748 * wrapping another exception. 749 * 750 * @throws org.xml.sax.SAXException 751 */ 752 public final void endDocument() throws org.xml.sax.SAXException 753 { 754 755 flushPending(); 756 if (m_doIndent && !m_isprevtext) 757 { 758 try 759 { 760 outputLineSep(); 761 } 762 catch(IOException e) 763 { 764 throw new SAXException(e); 765 } 766 } 767 768 flushWriter(); 769 if (m_tracer != null) 770 super.fireEndDoc(); 771 } 772 773 /** 774 * Receive notification of the beginning of an element. 775 * 776 * 777 * @param namespaceURI 778 * @param localName 779 * @param name The element type name. 780 * @param atts The attributes attached to the element, if any. 781 * @throws org.xml.sax.SAXException Any SAX exception, possibly 782 * wrapping another exception. 783 * @see #endElement 784 * @see org.xml.sax.AttributeList 785 */ 786 public void startElement( 787 String namespaceURI, 788 String localName, 789 String name, 790 Attributes atts) 791 throws org.xml.sax.SAXException 792 { 793 794 ElemContext elemContext = m_elemContext; 795 796 // clean up any pending things first 797 if (elemContext.m_startTagOpen) 798 { 799 closeStartTag(); 800 elemContext.m_startTagOpen = false; 801 } 802 else if (m_cdataTagOpen) 803 { 804 closeCDATA(); 805 m_cdataTagOpen = false; 806 } 807 else if (m_needToCallStartDocument) 808 { 809 startDocumentInternal(); 810 m_needToCallStartDocument = false; 811 } 812 813 if (m_needToOutputDocTypeDecl) { 814 String n = name; 815 if (n == null || n.length() == 0) { 816 // If the lexical QName is not given 817 // use the localName in the DOCTYPE 818 n = localName; 819 } 820 outputDocTypeDecl(n); 821 } 822 823 824 // if this element has a namespace then treat it like XML 825 if (null != namespaceURI && namespaceURI.length() > 0) 826 { 827 super.startElement(namespaceURI, localName, name, atts); 828 829 return; 830 } 831 832 try 833 { 834 // getElemDesc2(name) is faster than getElemDesc(name) 835 ElemDesc elemDesc = getElemDesc2(name); 836 int elemFlags = elemDesc.getFlags(); 837 838 // deal with indentation issues first 839 if (m_doIndent) 840 { 841 842 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0; 843 if (m_ispreserve) 844 m_ispreserve = false; 845 else if ( 846 (null != elemContext.m_elementName) 847 && (!m_inBlockElem 848 || isBlockElement) /* && !isWhiteSpaceSensitive */ 849 ) 850 { 851 m_startNewLine = true; 852 853 indent(); 854 855 } 856 m_inBlockElem = !isBlockElement; 857 } 858 859 // save any attributes for later processing 860 if (atts != null) 861 addAttributes(atts); 862 863 m_isprevtext = false; 864 final java.io.Writer writer = m_writer; 865 writer.write('<'); 866 writer.write(name); 867 868 869 870 if (m_tracer != null) 871 firePseudoAttributes(); 872 873 if ((elemFlags & ElemDesc.EMPTY) != 0) 874 { 875 // an optimization for elements which are expected 876 // to be empty. 877 m_elemContext = elemContext.push(); 878 /* XSLTC sometimes calls namespaceAfterStartElement() 879 * so we need to remember the name 880 */ 881 m_elemContext.m_elementName = name; 882 m_elemContext.m_elementDesc = elemDesc; 883 return; 884 } 885 else 886 { 887 elemContext = elemContext.push(namespaceURI,localName,name); 888 m_elemContext = elemContext; 889 elemContext.m_elementDesc = elemDesc; 890 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0; 891 } 892 893 894 if ((elemFlags & ElemDesc.HEADELEM) != 0) 895 { 896 // This is the <HEAD> element, do some special processing 897 closeStartTag(); 898 elemContext.m_startTagOpen = false; 899 if (!m_omitMetaTag) 900 { 901 if (m_doIndent) 902 indent(); 903 writer.write( 904 "<META http-equiv=\"Content-Type\" content=\"text/html; charset="); 905 String encoding = getEncoding(); 906 String encode = Encodings.getMimeEncoding(encoding); 907 writer.write(encode); 908 writer.write("\">"); 909 } 910 } 911 } 912 catch (IOException e) 913 { 914 throw new SAXException(e); 915 } 916 } 917 918 /** 919 * Receive notification of the end of an element. 920 * 921 * 922 * @param namespaceURI 923 * @param localName 924 * @param name The element type name 925 * @throws org.xml.sax.SAXException Any SAX exception, possibly 926 * wrapping another exception. 927 */ 928 public final void endElement( 929 final String namespaceURI, 930 final String localName, 931 final String name) 932 throws org.xml.sax.SAXException 933 { 934 // deal with any pending issues 935 if (m_cdataTagOpen) 936 closeCDATA(); 937 938 // if the element has a namespace, treat it like XML, not HTML 939 if (null != namespaceURI && namespaceURI.length() > 0) 940 { 941 super.endElement(namespaceURI, localName, name); 942 943 return; 944 } 945 946 try 947 { 948 949 ElemContext elemContext = m_elemContext; 950 final ElemDesc elemDesc = elemContext.m_elementDesc; 951 final int elemFlags = elemDesc.getFlags(); 952 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0; 953 954 // deal with any indentation issues 955 if (m_doIndent) 956 { 957 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0; 958 boolean shouldIndent = false; 959 960 if (m_ispreserve) 961 { 962 m_ispreserve = false; 963 } 964 else if (m_doIndent && (!m_inBlockElem || isBlockElement)) 965 { 966 m_startNewLine = true; 967 shouldIndent = true; 968 } 969 if (!elemContext.m_startTagOpen && shouldIndent) 970 indent(elemContext.m_currentElemDepth - 1); 971 m_inBlockElem = !isBlockElement; 972 } 973 974 final java.io.Writer writer = m_writer; 975 if (!elemContext.m_startTagOpen) 976 { 977 writer.write("</"); 978 writer.write(name); 979 writer.write('>'); 980 } 981 else 982 { 983 // the start-tag open when this method was called, 984 // so we need to process it now. 985 986 if (m_tracer != null) 987 super.fireStartElem(name); 988 989 // the starting tag was still open when we received this endElement() call 990 // so we need to process any gathered attributes NOW, before they go away. 991 int nAttrs = m_attributes.getLength(); 992 if (nAttrs > 0) 993 { 994 processAttributes(m_writer, nAttrs); 995 // clear attributes object for re-use with next element 996 m_attributes.clear(); 997 } 998 if (!elemEmpty) 999 { 1000 // As per Dave/Paul recommendation 12/06/2000 1001 // if (shouldIndent) 1002 // writer.write('>'); 1003 // indent(m_currentIndent); 1004 1005 writer.write("></"); 1006 writer.write(name); 1007 writer.write('>'); 1008 } 1009 else 1010 { 1011 writer.write('>'); 1012 } 1013 } 1014 1015 // clean up because the element has ended 1016 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0) 1017 m_ispreserve = true; 1018 m_isprevtext = false; 1019 1020 // fire off the end element event 1021 if (m_tracer != null) 1022 super.fireEndElem(name); 1023 1024 // OPTIMIZE-EMPTY 1025 if (elemEmpty) 1026 { 1027 // a quick exit if the HTML element had no children. 1028 // This block of code can be removed if the corresponding block of code 1029 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed 1030 m_elemContext = elemContext.m_prev; 1031 return; 1032 } 1033 1034 // some more clean because the element has ended. 1035 if (!elemContext.m_startTagOpen) 1036 { 1037 if (m_doIndent && !m_preserves.isEmpty()) 1038 m_preserves.pop(); 1039 } 1040 m_elemContext = elemContext.m_prev; 1041 // m_isRawStack.pop(); 1042 } 1043 catch (IOException e) 1044 { 1045 throw new SAXException(e); 1046 } 1047 } 1048 1049 /** 1050 * Process an attribute. 1051 * @param writer The writer to write the processed output to. 1052 * @param name The name of the attribute. 1053 * @param value The value of the attribute. 1054 * @param elemDesc The description of the HTML element 1055 * that has this attribute. 1056 * 1057 * @throws org.xml.sax.SAXException 1058 */ 1059 protected void processAttribute( 1060 java.io.Writer writer, 1061 String name, 1062 String value, 1063 ElemDesc elemDesc) 1064 throws IOException 1065 { 1066 writer.write(' '); 1067 1068 if ( ((value.length() == 0) || value.equalsIgnoreCase(name)) 1069 && elemDesc != null 1070 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY)) 1071 { 1072 writer.write(name); 1073 } 1074 else 1075 { 1076 // %REVIEW% %OPT% 1077 // Two calls to single-char write may NOT 1078 // be more efficient than one to string-write... 1079 writer.write(name); 1080 writer.write("=\""); 1081 if ( elemDesc != null 1082 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL)) 1083 writeAttrURI(writer, value, m_specialEscapeURLs); 1084 else 1085 writeAttrString(writer, value, this.getEncoding()); 1086 writer.write('"'); 1087 1088 } 1089 } 1090 1091 /** 1092 * Tell if a character is an ASCII digit. 1093 */ 1094 private boolean isASCIIDigit(char c) 1095 { 1096 return (c >= '0' && c <= '9'); 1097 } 1098 1099 /** 1100 * Make an integer into an HH hex value. 1101 * Does no checking on the size of the input, since this 1102 * is only meant to be used locally by writeAttrURI. 1103 * 1104 * @param i must be a value less than 255. 1105 * 1106 * @return should be a two character string. 1107 */ 1108 private static String makeHHString(int i) 1109 { 1110 String s = Integer.toHexString(i).toUpperCase(); 1111 if (s.length() == 1) 1112 { 1113 s = "0" + s; 1114 } 1115 return s; 1116 } 1117 1118 /** 1119 * Dmitri Ilyin: Makes sure if the String is HH encoded sign. 1120 * @param str must be 2 characters long 1121 * 1122 * @return true or false 1123 */ 1124 private boolean isHHSign(String str) 1125 { 1126 boolean sign = true; 1127 try 1128 { 1129 char r = (char) Integer.parseInt(str, 16); 1130 } 1131 catch (NumberFormatException e) 1132 { 1133 sign = false; 1134 } 1135 return sign; 1136 } 1137 1138 /** 1139 * Write the specified <var>string</var> after substituting non ASCII characters, 1140 * with <CODE>%HH</CODE>, where HH is the hex of the byte value. 1141 * 1142 * @param string String to convert to XML format. 1143 * @param doURLEscaping True if we should try to encode as 1144 * per http://www.ietf.org/rfc/rfc2396.txt. 1145 * 1146 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected. 1147 */ 1148 public void writeAttrURI( 1149 final java.io.Writer writer, String string, boolean doURLEscaping) 1150 throws IOException 1151 { 1152 // http://www.ietf.org/rfc/rfc2396.txt says: 1153 // A URI is always in an "escaped" form, since escaping or unescaping a 1154 // completed URI might change its semantics. Normally, the only time 1155 // escape encodings can safely be made is when the URI is being created 1156 // from its component parts; each component may have its own set of 1157 // characters that are reserved, so only the mechanism responsible for 1158 // generating or interpreting that component can determine whether or 1159 // not escaping a character will change its semantics. Likewise, a URI 1160 // must be separated into its components before the escaped characters 1161 // within those components can be safely decoded. 1162 // 1163 // ...So we do our best to do limited escaping of the URL, without 1164 // causing damage. If the URL is already properly escaped, in theory, this 1165 // function should not change the string value. 1166 1167 final int end = string.length(); 1168 if (end > m_attrBuff.length) 1169 { 1170 m_attrBuff = new char[end*2 + 1]; 1171 } 1172 string.getChars(0,end, m_attrBuff, 0); 1173 final char[] chars = m_attrBuff; 1174 1175 int cleanStart = 0; 1176 int cleanLength = 0; 1177 1178 1179 char ch = 0; 1180 for (int i = 0; i < end; i++) 1181 { 1182 ch = chars[i]; 1183 1184 if ((ch < 32) || (ch > 126)) 1185 { 1186 if (cleanLength > 0) 1187 { 1188 writer.write(chars, cleanStart, cleanLength); 1189 cleanLength = 0; 1190 } 1191 if (doURLEscaping) 1192 { 1193 // Encode UTF16 to UTF8. 1194 // Reference is Unicode, A Primer, by Tony Graham. 1195 // Page 92. 1196 1197 // Note that Kay doesn't escape 0x20... 1198 // if(ch == 0x20) // Not sure about this... -sb 1199 // { 1200 // writer.write(ch); 1201 // } 1202 // else 1203 if (ch <= 0x7F) 1204 { 1205 writer.write('%'); 1206 writer.write(makeHHString(ch)); 1207 } 1208 else if (ch <= 0x7FF) 1209 { 1210 // Clear low 6 bits before rotate, put high 4 bits in low byte, 1211 // and set two high bits. 1212 int high = (ch >> 6) | 0xC0; 1213 int low = (ch & 0x3F) | 0x80; 1214 // First 6 bits, + high bit 1215 writer.write('%'); 1216 writer.write(makeHHString(high)); 1217 writer.write('%'); 1218 writer.write(makeHHString(low)); 1219 } 1220 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate 1221 { 1222 // I'm sure this can be done in 3 instructions, but I choose 1223 // to try and do it exactly like it is done in the book, at least 1224 // until we are sure this is totally clean. I don't think performance 1225 // is a big issue with this particular function, though I could be 1226 // wrong. Also, the stuff below clearly does more masking than 1227 // it needs to do. 1228 1229 // Clear high 6 bits. 1230 int highSurrogate = ((int) ch) & 0x03FF; 1231 1232 // Middle 4 bits (wwww) + 1 1233 // "Note that the value of wwww from the high surrogate bit pattern 1234 // is incremented to make the uuuuu bit pattern in the scalar value 1235 // so the surrogate pair don't address the BMP." 1236 int wwww = ((highSurrogate & 0x03C0) >> 6); 1237 int uuuuu = wwww + 1; 1238 1239 // next 4 bits 1240 int zzzz = (highSurrogate & 0x003C) >> 2; 1241 1242 // low 2 bits 1243 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30; 1244 1245 // Get low surrogate character. 1246 ch = chars[++i]; 1247 1248 // Clear high 6 bits. 1249 int lowSurrogate = ((int) ch) & 0x03FF; 1250 1251 // put the middle 4 bits into the bottom of yyyyyy (byte 3) 1252 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6); 1253 1254 // bottom 6 bits. 1255 int xxxxxx = (lowSurrogate & 0x003F); 1256 1257 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu 1258 int byte2 = 1259 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz; 1260 int byte3 = 0x80 | yyyyyy; 1261 int byte4 = 0x80 | xxxxxx; 1262 1263 writer.write('%'); 1264 writer.write(makeHHString(byte1)); 1265 writer.write('%'); 1266 writer.write(makeHHString(byte2)); 1267 writer.write('%'); 1268 writer.write(makeHHString(byte3)); 1269 writer.write('%'); 1270 writer.write(makeHHString(byte4)); 1271 } 1272 else 1273 { 1274 int high = (ch >> 12) | 0xE0; // top 4 bits 1275 int middle = ((ch & 0x0FC0) >> 6) | 0x80; 1276 // middle 6 bits 1277 int low = (ch & 0x3F) | 0x80; 1278 // First 6 bits, + high bit 1279 writer.write('%'); 1280 writer.write(makeHHString(high)); 1281 writer.write('%'); 1282 writer.write(makeHHString(middle)); 1283 writer.write('%'); 1284 writer.write(makeHHString(low)); 1285 } 1286 1287 } 1288 else if (escapingNotNeeded(ch)) 1289 { 1290 writer.write(ch); 1291 } 1292 else 1293 { 1294 writer.write("&#"); 1295 writer.write(Integer.toString(ch)); 1296 writer.write(';'); 1297 } 1298 // In this character range we have first written out any previously accumulated 1299 // "clean" characters, then processed the current more complicated character, 1300 // which may have incremented "i". 1301 // We now we reset the next possible clean character. 1302 cleanStart = i + 1; 1303 } 1304 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as 1305 // not allowing quotes in the URI proper syntax, nor in the fragment 1306 // identifier, we believe that it's OK to double escape quotes. 1307 else if (ch == '"') 1308 { 1309 // If the character is a '%' number number, try to avoid double-escaping. 1310 // There is a question if this is legal behavior. 1311 1312 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded 1313 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little. 1314 1315 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) ) 1316 1317 // We are no longer escaping '%' 1318 1319 if (cleanLength > 0) 1320 { 1321 writer.write(chars, cleanStart, cleanLength); 1322 cleanLength = 0; 1323 } 1324 1325 1326 // Mike Kay encodes this as ", so he may know something I don't? 1327 if (doURLEscaping) 1328 writer.write("%22"); 1329 else 1330 writer.write("""); // we have to escape this, I guess. 1331 1332 // We have written out any clean characters, then the escaped '%' and now we 1333 // We now we reset the next possible clean character. 1334 cleanStart = i + 1; 1335 } 1336 else if (ch == '&') 1337 { 1338 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38) 1339 // instead of "&" to avoid confusion with the beginning of a character 1340 // reference (entity reference open delimiter). 1341 if (cleanLength > 0) 1342 { 1343 writer.write(chars, cleanStart, cleanLength); 1344 cleanLength = 0; 1345 } 1346 writer.write("&"); 1347 cleanStart = i + 1; 1348 } 1349 else 1350 { 1351 // no processing for this character, just count how 1352 // many characters in a row that we have that need no processing 1353 cleanLength++; 1354 } 1355 } 1356 1357 // are there any clean characters at the end of the array 1358 // that we haven't processed yet? 1359 if (cleanLength > 1) 1360 { 1361 // if the whole string can be written out as-is do so 1362 // otherwise write out the clean chars at the end of the 1363 // array 1364 if (cleanStart == 0) 1365 writer.write(string); 1366 else 1367 writer.write(chars, cleanStart, cleanLength); 1368 } 1369 else if (cleanLength == 1) 1370 { 1371 // a little optimization for 1 clean character 1372 // (we could have let the previous if(...) handle them all) 1373 writer.write(ch); 1374 } 1375 } 1376 1377 /** 1378 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>, 1379 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>. 1380 * 1381 * @param string String to convert to XML format. 1382 * @param encoding CURRENTLY NOT IMPLEMENTED. 1383 * 1384 * @throws org.xml.sax.SAXException 1385 */ 1386 public void writeAttrString( 1387 final java.io.Writer writer, String string, String encoding) 1388 throws IOException 1389 { 1390 final int end = string.length(); 1391 if (end > m_attrBuff.length) 1392 { 1393 m_attrBuff = new char[end * 2 + 1]; 1394 } 1395 string.getChars(0, end, m_attrBuff, 0); 1396 final char[] chars = m_attrBuff; 1397 1398 1399 1400 int cleanStart = 0; 1401 int cleanLength = 0; 1402 1403 char ch = 0; 1404 for (int i = 0; i < end; i++) 1405 { 1406 ch = chars[i]; 1407 1408 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE); 1409 // System.out.println("ch: "+(int)ch); 1410 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter); 1411 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]); 1412 if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch))) 1413 { 1414 cleanLength++; 1415 } 1416 else if ('<' == ch || '>' == ch) 1417 { 1418 cleanLength++; // no escaping in this case, as specified in 15.2 1419 } 1420 else if ( 1421 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1])) 1422 { 1423 cleanLength++; // no escaping in this case, as specified in 15.2 1424 } 1425 else 1426 { 1427 if (cleanLength > 0) 1428 { 1429 writer.write(chars,cleanStart,cleanLength); 1430 cleanLength = 0; 1431 } 1432 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true); 1433 1434 if (i != pos) 1435 { 1436 i = pos - 1; 1437 } 1438 else 1439 { 1440 if (Encodings.isHighUTF16Surrogate(ch)) 1441 { 1442 1443 writeUTF16Surrogate(ch, chars, i, end); 1444 i++; // two input characters processed 1445 // this increments by one and the for() 1446 // loop itself increments by another one. 1447 } 1448 1449 // The next is kind of a hack to keep from escaping in the case 1450 // of Shift_JIS and the like. 1451 1452 /* 1453 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF) 1454 && (ch != 160)) 1455 { 1456 writer.write(ch); // no escaping in this case 1457 } 1458 else 1459 */ 1460 String outputStringForChar = m_charInfo.getOutputStringForChar(ch); 1461 if (null != outputStringForChar) 1462 { 1463 writer.write(outputStringForChar); 1464 } 1465 else if (escapingNotNeeded(ch)) 1466 { 1467 writer.write(ch); // no escaping in this case 1468 } 1469 else 1470 { 1471 writer.write("&#"); 1472 writer.write(Integer.toString(ch)); 1473 writer.write(';'); 1474 } 1475 } 1476 cleanStart = i + 1; 1477 } 1478 } // end of for() 1479 1480 // are there any clean characters at the end of the array 1481 // that we haven't processed yet? 1482 if (cleanLength > 1) 1483 { 1484 // if the whole string can be written out as-is do so 1485 // otherwise write out the clean chars at the end of the 1486 // array 1487 if (cleanStart == 0) 1488 writer.write(string); 1489 else 1490 writer.write(chars, cleanStart, cleanLength); 1491 } 1492 else if (cleanLength == 1) 1493 { 1494 // a little optimization for 1 clean character 1495 // (we could have let the previous if(...) handle them all) 1496 writer.write(ch); 1497 } 1498 } 1499 1500 1501 1502 /** 1503 * Receive notification of character data. 1504 * 1505 * <p>The Parser will call this method to report each chunk of 1506 * character data. SAX parsers may return all contiguous character 1507 * data in a single chunk, or they may split it into several 1508 * chunks; however, all of the characters in any single event 1509 * must come from the same external entity, so that the Locator 1510 * provides useful information.</p> 1511 * 1512 * <p>The application must not attempt to read from the array 1513 * outside of the specified range.</p> 1514 * 1515 * <p>Note that some parsers will report whitespace using the 1516 * ignorableWhitespace() method rather than this one (validating 1517 * parsers must do so).</p> 1518 * 1519 * @param chars The characters from the XML document. 1520 * @param start The start position in the array. 1521 * @param length The number of characters to read from the array. 1522 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1523 * wrapping another exception. 1524 * @see #ignorableWhitespace 1525 * @see org.xml.sax.Locator 1526 * 1527 * @throws org.xml.sax.SAXException 1528 */ 1529 public final void characters(char chars[], int start, int length) 1530 throws org.xml.sax.SAXException 1531 { 1532 1533 if (m_elemContext.m_isRaw) 1534 { 1535 try 1536 { 1537 // Clean up some pending issues. 1538 if (m_elemContext.m_startTagOpen) 1539 { 1540 closeStartTag(); 1541 m_elemContext.m_startTagOpen = false; 1542 } 1543 1544 m_ispreserve = true; 1545 1546 writeNormalizedChars(chars, start, length, false, m_lineSepUse); 1547 1548 // time to generate characters event 1549 if (m_tracer != null) 1550 super.fireCharEvent(chars, start, length); 1551 1552 return; 1553 } 1554 catch (IOException ioe) 1555 { 1556 throw new org.xml.sax.SAXException( 1557 Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe); 1558 } 1559 } 1560 else 1561 { 1562 super.characters(chars, start, length); 1563 } 1564 } 1565 1566 /** 1567 * Receive notification of cdata. 1568 * 1569 * <p>The Parser will call this method to report each chunk of 1570 * character data. SAX parsers may return all contiguous character 1571 * data in a single chunk, or they may split it into several 1572 * chunks; however, all of the characters in any single event 1573 * must come from the same external entity, so that the Locator 1574 * provides useful information.</p> 1575 * 1576 * <p>The application must not attempt to read from the array 1577 * outside of the specified range.</p> 1578 * 1579 * <p>Note that some parsers will report whitespace using the 1580 * ignorableWhitespace() method rather than this one (validating 1581 * parsers must do so).</p> 1582 * 1583 * @param ch The characters from the XML document. 1584 * @param start The start position in the array. 1585 * @param length The number of characters to read from the array. 1586 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1587 * wrapping another exception. 1588 * @see #ignorableWhitespace 1589 * @see org.xml.sax.Locator 1590 * 1591 * @throws org.xml.sax.SAXException 1592 */ 1593 public final void cdata(char ch[], int start, int length) 1594 throws org.xml.sax.SAXException 1595 { 1596 1597 if ((null != m_elemContext.m_elementName) 1598 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT") 1599 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE"))) 1600 { 1601 try 1602 { 1603 if (m_elemContext.m_startTagOpen) 1604 { 1605 closeStartTag(); 1606 m_elemContext.m_startTagOpen = false; 1607 } 1608 1609 m_ispreserve = true; 1610 1611 if (shouldIndent()) 1612 indent(); 1613 1614 // writer.write(ch, start, length); 1615 writeNormalizedChars(ch, start, length, true, m_lineSepUse); 1616 } 1617 catch (IOException ioe) 1618 { 1619 throw new org.xml.sax.SAXException( 1620 Utils.messages.createMessage( 1621 MsgKey.ER_OIERROR, 1622 null), 1623 ioe); 1624 //"IO error", ioe); 1625 } 1626 } 1627 else 1628 { 1629 super.cdata(ch, start, length); 1630 } 1631 } 1632 1633 /** 1634 * Receive notification of a processing instruction. 1635 * 1636 * @param target The processing instruction target. 1637 * @param data The processing instruction data, or null if 1638 * none was supplied. 1639 * @throws org.xml.sax.SAXException Any SAX exception, possibly 1640 * wrapping another exception. 1641 * 1642 * @throws org.xml.sax.SAXException 1643 */ 1644 public void processingInstruction(String target, String data) 1645 throws org.xml.sax.SAXException 1646 { 1647 1648 // Process any pending starDocument and startElement first. 1649 flushPending(); 1650 1651 // Use a fairly nasty hack to tell if the next node is supposed to be 1652 // unescaped text. 1653 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING)) 1654 { 1655 startNonEscaping(); 1656 } 1657 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING)) 1658 { 1659 endNonEscaping(); 1660 } 1661 else 1662 { 1663 try 1664 { 1665 // clean up any pending things first 1666 if (m_elemContext.m_startTagOpen) 1667 { 1668 closeStartTag(); 1669 m_elemContext.m_startTagOpen = false; 1670 } 1671 else if (m_cdataTagOpen) 1672 { 1673 closeCDATA(); 1674 } 1675 else if (m_needToCallStartDocument) 1676 { 1677 startDocumentInternal(); 1678 } 1679 1680 1681 /* 1682 * Perhaps processing instructions can be written out in HTML before 1683 * the DOCTYPE, in which case this could be emitted with the 1684 * startElement call, that knows the name of the document element 1685 * doing it right. 1686 */ 1687 if (true == m_needToOutputDocTypeDecl) 1688 outputDocTypeDecl("html"); // best guess for the upcoming element 1689 1690 1691 if (shouldIndent()) 1692 indent(); 1693 1694 final java.io.Writer writer = m_writer; 1695 //writer.write("<?" + target); 1696 writer.write("<?"); 1697 writer.write(target); 1698 1699 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0))) 1700 writer.write(' '); 1701 1702 //writer.write(data + ">"); // different from XML 1703 writer.write(data); // different from XML 1704 writer.write('>'); // different from XML 1705 1706 // Always output a newline char if not inside of an 1707 // element. The whitespace is not significant in that 1708 // case. 1709 if (m_elemContext.m_currentElemDepth <= 0) 1710 outputLineSep(); 1711 1712 m_startNewLine = true; 1713 } 1714 catch(IOException e) 1715 { 1716 throw new SAXException(e); 1717 } 1718 } 1719 1720 // now generate the PI event 1721 if (m_tracer != null) 1722 super.fireEscapingEvent(target, data); 1723 } 1724 1725 /** 1726 * Receive notivication of a entityReference. 1727 * 1728 * @param name non-null reference to entity name string. 1729 * 1730 * @throws org.xml.sax.SAXException 1731 */ 1732 public final void entityReference(String name) 1733 throws org.xml.sax.SAXException 1734 { 1735 try 1736 { 1737 1738 final java.io.Writer writer = m_writer; 1739 writer.write('&'); 1740 writer.write(name); 1741 writer.write(';'); 1742 1743 } catch(IOException e) 1744 { 1745 throw new SAXException(e); 1746 } 1747 } 1748 /** 1749 * @see ExtendedContentHandler#endElement(String) 1750 */ 1751 public final void endElement(String elemName) throws SAXException 1752 { 1753 endElement(null, null, elemName); 1754 } 1755 1756 /** 1757 * Process the attributes, which means to write out the currently 1758 * collected attributes to the writer. The attributes are not 1759 * cleared by this method 1760 * 1761 * @param writer the writer to write processed attributes to. 1762 * @param nAttrs the number of attributes in m_attributes 1763 * to be processed 1764 * 1765 * @throws org.xml.sax.SAXException 1766 */ 1767 public void processAttributes(java.io.Writer writer, int nAttrs) 1768 throws IOException,SAXException 1769 { 1770 /* 1771 * process the collected attributes 1772 */ 1773 for (int i = 0; i < nAttrs; i++) 1774 { 1775 processAttribute( 1776 writer, 1777 m_attributes.getQName(i), 1778 m_attributes.getValue(i), 1779 m_elemContext.m_elementDesc); 1780 } 1781 } 1782 1783 /** 1784 * For the enclosing elements starting tag write out out any attributes 1785 * followed by ">". At this point we also mark if this element is 1786 * a cdata-section-element. 1787 * 1788 *@throws org.xml.sax.SAXException 1789 */ 1790 protected void closeStartTag() throws SAXException 1791 { 1792 try 1793 { 1794 1795 // finish processing attributes, time to fire off the start element event 1796 if (m_tracer != null) 1797 super.fireStartElem(m_elemContext.m_elementName); 1798 1799 int nAttrs = m_attributes.getLength(); 1800 if (nAttrs>0) 1801 { 1802 processAttributes(m_writer, nAttrs); 1803 // clear attributes object for re-use with next element 1804 m_attributes.clear(); 1805 } 1806 1807 m_writer.write('>'); 1808 1809 /* At this point we have the prefix mappings now, so 1810 * lets determine if the current element is specified in the cdata- 1811 * section-elements list. 1812 */ 1813 if (m_CdataElems != null) // if there are any cdata sections 1814 m_elemContext.m_isCdataSection = isCdataSection(); 1815 if (m_doIndent) 1816 { 1817 m_isprevtext = false; 1818 m_preserves.push(m_ispreserve); 1819 } 1820 1821 } 1822 catch(IOException e) 1823 { 1824 throw new SAXException(e); 1825 } 1826 } 1827 1828 1829 1830 /** 1831 * This method is used when a prefix/uri namespace mapping 1832 * is indicated after the element was started with a 1833 * startElement() and before and endElement(). 1834 * startPrefixMapping(prefix,uri) would be used before the 1835 * startElement() call. 1836 * @param uri the URI of the namespace 1837 * @param prefix the prefix associated with the given URI. 1838 * 1839 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String) 1840 */ 1841 public void namespaceAfterStartElement(String prefix, String uri) 1842 throws SAXException 1843 { 1844 // hack for XSLTC with finding URI for default namespace 1845 if (m_elemContext.m_elementURI == null) 1846 { 1847 String prefix1 = getPrefixPart(m_elemContext.m_elementName); 1848 if (prefix1 == null && EMPTYSTRING.equals(prefix)) 1849 { 1850 // the elements URI is not known yet, and it 1851 // doesn't have a prefix, and we are currently 1852 // setting the uri for prefix "", so we have 1853 // the uri for the element... lets remember it 1854 m_elemContext.m_elementURI = uri; 1855 } 1856 } 1857 startPrefixMapping(prefix,uri,false); 1858 } 1859 1860 public void startDTD(String name, String publicId, String systemId) 1861 throws SAXException 1862 { 1863 m_inDTD = true; 1864 super.startDTD(name, publicId, systemId); 1865 } 1866 1867 /** 1868 * Report the end of DTD declarations. 1869 * @throws org.xml.sax.SAXException The application may raise an exception. 1870 * @see #startDTD 1871 */ 1872 public void endDTD() throws org.xml.sax.SAXException 1873 { 1874 m_inDTD = false; 1875 /* for ToHTMLStream the DOCTYPE is entirely output in the 1876 * startDocumentInternal() method, so don't do anything here 1877 */ 1878 } 1879 /** 1880 * This method does nothing. 1881 */ 1882 public void attributeDecl( 1883 String eName, 1884 String aName, 1885 String type, 1886 String valueDefault, 1887 String value) 1888 throws SAXException 1889 { 1890 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1891 } 1892 1893 /** 1894 * This method does nothing. 1895 */ 1896 public void elementDecl(String name, String model) throws SAXException 1897 { 1898 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1899 } 1900 /** 1901 * This method does nothing. 1902 */ 1903 public void internalEntityDecl(String name, String value) 1904 throws SAXException 1905 { 1906 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1907 } 1908 /** 1909 * This method does nothing. 1910 */ 1911 public void externalEntityDecl( 1912 String name, 1913 String publicId, 1914 String systemId) 1915 throws SAXException 1916 { 1917 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1918 } 1919 1920 /** 1921 * This method is used to add an attribute to the currently open element. 1922 * The caller has guaranted that this attribute is unique, which means that it 1923 * not been seen before and will not be seen again. 1924 * 1925 * @param name the qualified name of the attribute 1926 * @param value the value of the attribute which can contain only 1927 * ASCII printable characters characters in the range 32 to 127 inclusive. 1928 * @param flags the bit values of this integer give optimization information. 1929 */ 1930 public void addUniqueAttribute(String name, String value, int flags) 1931 throws SAXException 1932 { 1933 try 1934 { 1935 final java.io.Writer writer = m_writer; 1936 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt) 1937 { 1938 // "flags" has indicated that the characters 1939 // '>' '<' '&' and '"' are not in the value and 1940 // m_htmlcharInfo has recorded that there are no other 1941 // entities in the range 0 to 127 so we write out the 1942 // value directly 1943 writer.write(' '); 1944 writer.write(name); 1945 writer.write("=\""); 1946 writer.write(value); 1947 writer.write('"'); 1948 } 1949 else if ( 1950 (flags & HTML_ATTREMPTY) > 0 1951 && (value.length() == 0 || value.equalsIgnoreCase(name))) 1952 { 1953 writer.write(' '); 1954 writer.write(name); 1955 } 1956 else 1957 { 1958 writer.write(' '); 1959 writer.write(name); 1960 writer.write("=\""); 1961 if ((flags & HTML_ATTRURL) > 0) 1962 { 1963 writeAttrURI(writer, value, m_specialEscapeURLs); 1964 } 1965 else 1966 { 1967 writeAttrString(writer, value, this.getEncoding()); 1968 } 1969 writer.write('"'); 1970 } 1971 } catch (IOException e) { 1972 throw new SAXException(e); 1973 } 1974 } 1975 1976 public void comment(char ch[], int start, int length) 1977 throws SAXException 1978 { 1979 // The internal DTD subset is not serialized by the ToHTMLStream serializer 1980 if (m_inDTD) 1981 return; 1982 1983 // Clean up some pending issues, just in case 1984 // this call is coming right after a startElement() 1985 // or we are in the middle of writing out CDATA 1986 // or if a startDocument() call was not received 1987 if (m_elemContext.m_startTagOpen) 1988 { 1989 closeStartTag(); 1990 m_elemContext.m_startTagOpen = false; 1991 } 1992 else if (m_cdataTagOpen) 1993 { 1994 closeCDATA(); 1995 } 1996 else if (m_needToCallStartDocument) 1997 { 1998 startDocumentInternal(); 1999 } 2000 2001 /* 2002 * Perhaps comments can be written out in HTML before the DOCTYPE. 2003 * In this case we might delete this call to writeOutDOCTYPE, and 2004 * it would be handled within the startElement() call. 2005 */ 2006 if (m_needToOutputDocTypeDecl) 2007 outputDocTypeDecl("html"); // best guess for the upcoming element 2008 2009 super.comment(ch, start, length); 2010 } 2011 2012 public boolean reset() 2013 { 2014 boolean ret = super.reset(); 2015 if (!ret) 2016 return false; 2017 resetToHTMLStream(); 2018 return true; 2019 } 2020 2021 private void resetToHTMLStream() 2022 { 2023 // m_htmlcharInfo remains unchanged 2024 // m_htmlInfo = null; // Don't reset 2025 m_inBlockElem = false; 2026 m_inDTD = false; 2027 m_omitMetaTag = false; 2028 m_specialEscapeURLs = true; 2029 } 2030 2031 static class Trie 2032 { 2033 /** 2034 * A digital search trie for 7-bit ASCII text 2035 * The API is a subset of java.util.Hashtable 2036 * The key must be a 7-bit ASCII string 2037 * The value may be any Java Object 2038 * One can get an object stored in a trie from its key, 2039 * but the search is either case sensitive or case 2040 * insensitive to the characters in the key, and this 2041 * choice of sensitivity or insensitivity is made when 2042 * the Trie is created, before any objects are put in it. 2043 * 2044 * This class is a copy of the one in org.apache.xml.utils. 2045 * It exists to cut the serializers dependancy on that package. 2046 * 2047 * @xsl.usage internal 2048 */ 2049 2050 /** Size of the m_nextChar array. */ 2051 public static final int ALPHA_SIZE = 128; 2052 2053 /** The root node of the tree. */ 2054 final Node m_Root; 2055 2056 /** helper buffer to convert Strings to char arrays */ 2057 private char[] m_charBuffer = new char[0]; 2058 2059 /** true if the search for an object is lower case only with the key */ 2060 private final boolean m_lowerCaseOnly; 2061 2062 /** 2063 * Construct the trie that has a case insensitive search. 2064 */ 2065 public Trie() 2066 { 2067 m_Root = new Node(); 2068 m_lowerCaseOnly = false; 2069 } 2070 2071 /** 2072 * Construct the trie given the desired case sensitivity with the key. 2073 * @param lowerCaseOnly true if the search keys are to be loser case only, 2074 * not case insensitive. 2075 */ 2076 public Trie(boolean lowerCaseOnly) 2077 { 2078 m_Root = new Node(); 2079 m_lowerCaseOnly = lowerCaseOnly; 2080 } 2081 2082 /** 2083 * Put an object into the trie for lookup. 2084 * 2085 * @param key must be a 7-bit ASCII string 2086 * @param value any java object. 2087 * 2088 * @return The old object that matched key, or null. 2089 */ 2090 public Object put(String key, Object value) 2091 { 2092 2093 final int len = key.length(); 2094 if (len > m_charBuffer.length) 2095 { 2096 // make the biggest buffer ever needed in get(String) 2097 m_charBuffer = new char[len]; 2098 } 2099 2100 Node node = m_Root; 2101 2102 for (int i = 0; i < len; i++) 2103 { 2104 Node nextNode = 2105 node.m_nextChar[Character.toLowerCase(key.charAt(i))]; 2106 2107 if (nextNode != null) 2108 { 2109 node = nextNode; 2110 } 2111 else 2112 { 2113 for (; i < len; i++) 2114 { 2115 Node newNode = new Node(); 2116 if (m_lowerCaseOnly) 2117 { 2118 // put this value into the tree only with a lower case key 2119 node.m_nextChar[Character.toLowerCase( 2120 key.charAt(i))] = 2121 newNode; 2122 } 2123 else 2124 { 2125 // put this value into the tree with a case insensitive key 2126 node.m_nextChar[Character.toUpperCase( 2127 key.charAt(i))] = 2128 newNode; 2129 node.m_nextChar[Character.toLowerCase( 2130 key.charAt(i))] = 2131 newNode; 2132 } 2133 node = newNode; 2134 } 2135 break; 2136 } 2137 } 2138 2139 Object ret = node.m_Value; 2140 2141 node.m_Value = value; 2142 2143 return ret; 2144 } 2145 2146 /** 2147 * Get an object that matches the key. 2148 * 2149 * @param key must be a 7-bit ASCII string 2150 * 2151 * @return The object that matches the key, or null. 2152 */ 2153 public Object get(final String key) 2154 { 2155 2156 final int len = key.length(); 2157 2158 /* If the name is too long, we won't find it, this also keeps us 2159 * from overflowing m_charBuffer 2160 */ 2161 if (m_charBuffer.length < len) 2162 return null; 2163 2164 Node node = m_Root; 2165 switch (len) // optimize the look up based on the number of chars 2166 { 2167 // case 0 looks silly, but the generated bytecode runs 2168 // faster for lookup of elements of length 2 with this in 2169 // and a fair bit faster. Don't know why. 2170 case 0 : 2171 { 2172 return null; 2173 } 2174 2175 case 1 : 2176 { 2177 final char ch = key.charAt(0); 2178 if (ch < ALPHA_SIZE) 2179 { 2180 node = node.m_nextChar[ch]; 2181 if (node != null) 2182 return node.m_Value; 2183 } 2184 return null; 2185 } 2186 // comment out case 2 because the default is faster 2187 // case 2 : 2188 // { 2189 // final char ch0 = key.charAt(0); 2190 // final char ch1 = key.charAt(1); 2191 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE) 2192 // { 2193 // node = node.m_nextChar[ch0]; 2194 // if (node != null) 2195 // { 2196 // 2197 // if (ch1 < ALPHA_SIZE) 2198 // { 2199 // node = node.m_nextChar[ch1]; 2200 // if (node != null) 2201 // return node.m_Value; 2202 // } 2203 // } 2204 // } 2205 // return null; 2206 // } 2207 default : 2208 { 2209 for (int i = 0; i < len; i++) 2210 { 2211 // A thread-safe way to loop over the characters 2212 final char ch = key.charAt(i); 2213 if (ALPHA_SIZE <= ch) 2214 { 2215 // the key is not 7-bit ASCII so we won't find it here 2216 return null; 2217 } 2218 2219 node = node.m_nextChar[ch]; 2220 if (node == null) 2221 return null; 2222 } 2223 2224 return node.m_Value; 2225 } 2226 } 2227 } 2228 2229 /** 2230 * The node representation for the trie. 2231 * @xsl.usage internal 2232 */ 2233 private class Node 2234 { 2235 2236 /** 2237 * Constructor, creates a Node[ALPHA_SIZE]. 2238 */ 2239 Node() 2240 { 2241 m_nextChar = new Node[ALPHA_SIZE]; 2242 m_Value = null; 2243 } 2244 2245 /** The next nodes. */ 2246 final Node m_nextChar[]; 2247 2248 /** The value. */ 2249 Object m_Value; 2250 } 2251 /** 2252 * Construct the trie from another Trie. 2253 * Both the existing Trie and this new one share the same table for 2254 * lookup, and it is assumed that the table is fully populated and 2255 * not changing anymore. 2256 * 2257 * @param existingTrie the Trie that this one is a copy of. 2258 */ 2259 public Trie(Trie existingTrie) 2260 { 2261 // copy some fields from the existing Trie into this one. 2262 m_Root = existingTrie.m_Root; 2263 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly; 2264 2265 // get a buffer just big enough to hold the longest key in the table. 2266 int max = existingTrie.getLongestKeyLength(); 2267 m_charBuffer = new char[max]; 2268 } 2269 2270 /** 2271 * Get an object that matches the key. 2272 * This method is faster than get(), but is not thread-safe. 2273 * 2274 * @param key must be a 7-bit ASCII string 2275 * 2276 * @return The object that matches the key, or null. 2277 */ 2278 public Object get2(final String key) 2279 { 2280 2281 final int len = key.length(); 2282 2283 /* If the name is too long, we won't find it, this also keeps us 2284 * from overflowing m_charBuffer 2285 */ 2286 if (m_charBuffer.length < len) 2287 return null; 2288 2289 Node node = m_Root; 2290 switch (len) // optimize the look up based on the number of chars 2291 { 2292 // case 0 looks silly, but the generated bytecode runs 2293 // faster for lookup of elements of length 2 with this in 2294 // and a fair bit faster. Don't know why. 2295 case 0 : 2296 { 2297 return null; 2298 } 2299 2300 case 1 : 2301 { 2302 final char ch = key.charAt(0); 2303 if (ch < ALPHA_SIZE) 2304 { 2305 node = node.m_nextChar[ch]; 2306 if (node != null) 2307 return node.m_Value; 2308 } 2309 return null; 2310 } 2311 default : 2312 { 2313 /* Copy string into array. This is not thread-safe because 2314 * it modifies the contents of m_charBuffer. If multiple 2315 * threads were to use this Trie they all would be 2316 * using this same array (not good). So this 2317 * method is not thread-safe, but it is faster because 2318 * converting to a char[] and looping over elements of 2319 * the array is faster than a String's charAt(i). 2320 */ 2321 key.getChars(0, len, m_charBuffer, 0); 2322 2323 for (int i = 0; i < len; i++) 2324 { 2325 final char ch = m_charBuffer[i]; 2326 if (ALPHA_SIZE <= ch) 2327 { 2328 // the key is not 7-bit ASCII so we won't find it here 2329 return null; 2330 } 2331 2332 node = node.m_nextChar[ch]; 2333 if (node == null) 2334 return null; 2335 } 2336 2337 return node.m_Value; 2338 } 2339 } 2340 } 2341 2342 /** 2343 * Get the length of the longest key used in the table. 2344 */ 2345 public int getLongestKeyLength() 2346 { 2347 return m_charBuffer.length; 2348 } 2349 } 2350 } 2351