1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 /* 19 * $Id: FastStringBuffer.java 469279 2006-10-30 21:18:02Z minchau $ 20 */ 21 package org.apache.xml.utils; 22 23 /** 24 * Bare-bones, unsafe, fast string buffer. No thread-safety, no 25 * parameter range checking, exposed fields. Note that in typical 26 * applications, thread-safety of a StringBuffer is a somewhat 27 * dubious concept in any case. 28 * <p> 29 * Note that Stree and DTM used a single FastStringBuffer as a string pool, 30 * by recording start and length indices within this single buffer. This 31 * minimizes heap overhead, but of course requires more work when retrieving 32 * the data. 33 * <p> 34 * FastStringBuffer operates as a "chunked buffer". Doing so 35 * reduces the need to recopy existing information when an append 36 * exceeds the space available; we just allocate another chunk and 37 * flow across to it. (The array of chunks may need to grow, 38 * admittedly, but that's a much smaller object.) Some excess 39 * recopying may arise when we extract Strings which cross chunk 40 * boundaries; larger chunks make that less frequent. 41 * <p> 42 * The size values are parameterized, to allow tuning this code. In 43 * theory, Result Tree Fragments might want to be tuned differently 44 * from the main document's text. 45 * <p> 46 * %REVIEW% An experiment in self-tuning is 47 * included in the code (using nested FastStringBuffers to achieve 48 * variation in chunk sizes), but this implementation has proven to 49 * be problematic when data may be being copied from the FSB into itself. 50 * We should either re-architect that to make this safe (if possible) 51 * or remove that code and clean up for performance/maintainability reasons. 52 * <p> 53 */ 54 public class FastStringBuffer 55 { 56 // If nonzero, forces the inial chunk size. 57 /**/static final int DEBUG_FORCE_INIT_BITS=0; 58 59 // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied 60 // back into the same FSB (variable set from previous variable, for example) 61 // and blocksize changes in mid-copy... there's risk of severe malfunction in 62 // the read process, due to how the resizing code re-jiggers storage. Arggh. 63 // If we want to retain the variable-size-block feature, we need to reconsider 64 // that issue. For now, I have forced us into fixed-size mode. 65 static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true; 66 67 /** Manifest constant: Suppress leading whitespace. 68 * This should be used when normalize-to-SAX is called for the first chunk of a 69 * multi-chunk output, or one following unsuppressed whitespace in a previous 70 * chunk. 71 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int) 72 */ 73 public static final int SUPPRESS_LEADING_WS=0x01; 74 75 /** Manifest constant: Suppress trailing whitespace. 76 * This should be used when normalize-to-SAX is called for the last chunk of a 77 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS. 78 */ 79 public static final int SUPPRESS_TRAILING_WS=0x02; 80 81 /** Manifest constant: Suppress both leading and trailing whitespace. 82 * This should be used when normalize-to-SAX is called for a complete string. 83 * (I'm not wild about the name of this one. Ideas welcome.) 84 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int) 85 */ 86 public static final int SUPPRESS_BOTH 87 = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS; 88 89 /** Manifest constant: Carry trailing whitespace of one chunk as leading 90 * whitespace of the next chunk. Used internally; I don't see any reason 91 * to make it public right now. 92 */ 93 private static final int CARRY_WS=0x04; 94 95 /** 96 * Field m_chunkBits sets our chunking strategy, by saying how many 97 * bits of index can be used within a single chunk before flowing over 98 * to the next chunk. For example, if m_chunkbits is set to 15, each 99 * chunk can contain up to 2^15 (32K) characters 100 */ 101 int m_chunkBits = 15; 102 103 /** 104 * Field m_maxChunkBits affects our chunk-growth strategy, by saying what 105 * the largest permissible chunk size is in this particular FastStringBuffer 106 * hierarchy. 107 */ 108 int m_maxChunkBits = 15; 109 110 /** 111 * Field m_rechunkBits affects our chunk-growth strategy, by saying how 112 * many chunks should be allocated at one size before we encapsulate them 113 * into the first chunk of the next size up. For example, if m_rechunkBits 114 * is set to 3, then after 8 chunks at a given size we will rebundle 115 * them as the first element of a FastStringBuffer using a chunk size 116 * 8 times larger (chunkBits shifted left three bits). 117 */ 118 int m_rebundleBits = 2; 119 120 /** 121 * Field m_chunkSize establishes the maximum size of one chunk of the array 122 * as 2**chunkbits characters. 123 * (Which may also be the minimum size if we aren't tuning for storage) 124 */ 125 int m_chunkSize; // =1<<(m_chunkBits-1); 126 127 /** 128 * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits 129 * worth of low-order '1' bits, useful for shift-and-mask addressing 130 * within the chunks. 131 */ 132 int m_chunkMask; // =m_chunkSize-1; 133 134 /** 135 * Field m_array holds the string buffer's text contents, using an 136 * array-of-arrays. Note that this array, and the arrays it contains, may be 137 * reallocated when necessary in order to allow the buffer to grow; 138 * references to them should be considered to be invalidated after any 139 * append. However, the only time these arrays are directly exposed 140 * is in the sendSAXcharacters call. 141 */ 142 char[][] m_array; 143 144 /** 145 * Field m_lastChunk is an index into m_array[], pointing to the last 146 * chunk of the Chunked Array currently in use. Note that additional 147 * chunks may actually be allocated, eg if the FastStringBuffer had 148 * previously been truncated or if someone issued an ensureSpace request. 149 * <p> 150 * The insertion point for append operations is addressed by the combination 151 * of m_lastChunk and m_firstFree. 152 */ 153 int m_lastChunk = 0; 154 155 /** 156 * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to 157 * the first character in the Chunked Array which is not part of the 158 * FastStringBuffer's current content. Since m_array[][] is zero-based, 159 * the length of that content can be calculated as 160 * (m_lastChunk<<m_chunkBits) + m_firstFree 161 */ 162 int m_firstFree = 0; 163 164 /** 165 * Field m_innerFSB, when non-null, is a FastStringBuffer whose total 166 * length equals m_chunkSize, and which replaces m_array[0]. This allows 167 * building a hierarchy of FastStringBuffers, where early appends use 168 * a smaller chunkSize (for less wasted memory overhead) but later 169 * ones use a larger chunkSize (for less heap activity overhead). 170 */ 171 FastStringBuffer m_innerFSB = null; 172 173 /** 174 * Construct a FastStringBuffer, with allocation policy as per parameters. 175 * <p> 176 * For coding convenience, I've expressed both allocation sizes in terms of 177 * a number of bits. That's needed for the final size of a chunk, 178 * to permit fast and efficient shift-and-mask addressing. It's less critical 179 * for the inital size, and may be reconsidered. 180 * <p> 181 * An alternative would be to accept integer sizes and round to powers of two; 182 * that really doesn't seem to buy us much, if anything. 183 * 184 * @param initChunkBits Length in characters of the initial allocation 185 * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024 186 * characters.) Later chunks will use larger allocation units, to trade off 187 * allocation speed of large document against storage efficiency of small 188 * ones. 189 * @param maxChunkBits Number of character-offset bits that should be used for 190 * addressing within a chunk. Maximum length of a chunk is 2^chunkBits 191 * characters. 192 * @param rebundleBits Number of character-offset bits that addressing should 193 * advance before we attempt to take a step from initChunkBits to maxChunkBits 194 */ 195 public FastStringBuffer(int initChunkBits, int maxChunkBits, 196 int rebundleBits) 197 { 198 if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS; 199 200 // %REVIEW% 201 // Should this force to larger value, or smaller? Smaller less efficient, but if 202 // someone requested variable mode it's because they care about storage space. 203 // On the other hand, given the other changes I'm making, odds are that we should 204 // adopt the larger size. Dither, dither, dither... This is just stopgap workaround 205 // anyway; we need a permanant solution. 206 // 207 if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits; 208 //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits; 209 210 m_array = new char[16][]; 211 212 // Don't bite off more than we're prepared to swallow! 213 if (initChunkBits > maxChunkBits) 214 initChunkBits = maxChunkBits; 215 216 m_chunkBits = initChunkBits; 217 m_maxChunkBits = maxChunkBits; 218 m_rebundleBits = rebundleBits; 219 m_chunkSize = 1 << (initChunkBits); 220 m_chunkMask = m_chunkSize - 1; 221 m_array[0] = new char[m_chunkSize]; 222 } 223 224 /** 225 * Construct a FastStringBuffer, using a default rebundleBits value. 226 * 227 * NEEDSDOC @param initChunkBits 228 * NEEDSDOC @param maxChunkBits 229 */ 230 public FastStringBuffer(int initChunkBits, int maxChunkBits) 231 { 232 this(initChunkBits, maxChunkBits, 2); 233 } 234 235 /** 236 * Construct a FastStringBuffer, using default maxChunkBits and 237 * rebundleBits values. 238 * <p> 239 * ISSUE: Should this call assert initial size, or fixed size? 240 * Now configured as initial, with a default for fixed. 241 * 242 * NEEDSDOC @param initChunkBits 243 */ 244 public FastStringBuffer(int initChunkBits) 245 { 246 this(initChunkBits, 15, 2); 247 } 248 249 /** 250 * Construct a FastStringBuffer, using a default allocation policy. 251 */ 252 public FastStringBuffer() 253 { 254 255 // 10 bits is 1K. 15 bits is 32K. Remember that these are character 256 // counts, so actual memory allocation unit is doubled for UTF-16 chars. 257 // 258 // For reference: In the original FastStringBuffer, we simply 259 // overallocated by blocksize (default 1KB) on each buffer-growth. 260 this(10, 15, 2); 261 } 262 263 /** 264 * Get the length of the list. Synonym for length(). 265 * 266 * @return the number of characters in the FastStringBuffer's content. 267 */ 268 public final int size() 269 { 270 return (m_lastChunk << m_chunkBits) + m_firstFree; 271 } 272 273 /** 274 * Get the length of the list. Synonym for size(). 275 * 276 * @return the number of characters in the FastStringBuffer's content. 277 */ 278 public final int length() 279 { 280 return (m_lastChunk << m_chunkBits) + m_firstFree; 281 } 282 283 /** 284 * Discard the content of the FastStringBuffer, and most of the memory 285 * that was allocated by it, restoring the initial state. Note that this 286 * may eventually be different from setLength(0), which see. 287 */ 288 public final void reset() 289 { 290 291 m_lastChunk = 0; 292 m_firstFree = 0; 293 294 // Recover the original chunk size 295 FastStringBuffer innermost = this; 296 297 while (innermost.m_innerFSB != null) 298 { 299 innermost = innermost.m_innerFSB; 300 } 301 302 m_chunkBits = innermost.m_chunkBits; 303 m_chunkSize = innermost.m_chunkSize; 304 m_chunkMask = innermost.m_chunkMask; 305 306 // Discard the hierarchy 307 m_innerFSB = null; 308 m_array = new char[16][0]; 309 m_array[0] = new char[m_chunkSize]; 310 } 311 312 /** 313 * Directly set how much of the FastStringBuffer's storage is to be 314 * considered part of its content. This is a fast but hazardous 315 * operation. It is not protected against negative values, or values 316 * greater than the amount of storage currently available... and even 317 * if additional storage does exist, its contents are unpredictable. 318 * The only safe use for our setLength() is to truncate the FastStringBuffer 319 * to a shorter string. 320 * 321 * @param l New length. If l<0 or l>=getLength(), this operation will 322 * not report an error but future operations will almost certainly fail. 323 */ 324 public final void setLength(int l) 325 { 326 m_lastChunk = l >>> m_chunkBits; 327 328 if (m_lastChunk == 0 && m_innerFSB != null) 329 { 330 // Replace this FSB with the appropriate inner FSB, truncated 331 m_innerFSB.setLength(l, this); 332 } 333 else 334 { 335 m_firstFree = l & m_chunkMask; 336 337 // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving 338 // us pointing at the start of a chunk which has not yet been allocated. Rather than 339 // pay the cost of dealing with that in the append loops (more scattered and more 340 // inner-loop), we correct it here by moving to the safe side of that 341 // line -- as we would have left the indexes had we appended up to that point. 342 if(m_firstFree==0 && m_lastChunk>0) 343 { 344 --m_lastChunk; 345 m_firstFree=m_chunkSize; 346 } 347 } 348 } 349 350 /** 351 * Subroutine for the public setLength() method. Deals with the fact 352 * that truncation may require restoring one of the innerFSBs 353 * 354 * NEEDSDOC @param l 355 * NEEDSDOC @param rootFSB 356 */ 357 private final void setLength(int l, FastStringBuffer rootFSB) 358 { 359 360 m_lastChunk = l >>> m_chunkBits; 361 362 if (m_lastChunk == 0 && m_innerFSB != null) 363 { 364 m_innerFSB.setLength(l, rootFSB); 365 } 366 else 367 { 368 369 // Undo encapsulation -- pop the innerFSB data back up to root. 370 // Inefficient, but attempts to keep the code simple. 371 rootFSB.m_chunkBits = m_chunkBits; 372 rootFSB.m_maxChunkBits = m_maxChunkBits; 373 rootFSB.m_rebundleBits = m_rebundleBits; 374 rootFSB.m_chunkSize = m_chunkSize; 375 rootFSB.m_chunkMask = m_chunkMask; 376 rootFSB.m_array = m_array; 377 rootFSB.m_innerFSB = m_innerFSB; 378 rootFSB.m_lastChunk = m_lastChunk; 379 380 // Finally, truncate this sucker. 381 rootFSB.m_firstFree = l & m_chunkMask; 382 } 383 } 384 385 /** 386 * Note that this operation has been somewhat deoptimized by the shift to a 387 * chunked array, as there is no factory method to produce a String object 388 * directly from an array of arrays and hence a double copy is needed. 389 * By using ensureCapacity we hope to minimize the heap overhead of building 390 * the intermediate StringBuffer. 391 * <p> 392 * (It really is a pity that Java didn't design String as a final subclass 393 * of MutableString, rather than having StringBuffer be a separate hierarchy. 394 * We'd avoid a <strong>lot</strong> of double-buffering.) 395 * 396 * @return the contents of the FastStringBuffer as a standard Java string. 397 */ 398 public final String toString() 399 { 400 401 int length = (m_lastChunk << m_chunkBits) + m_firstFree; 402 403 return getString(new StringBuffer(length), 0, 0, length).toString(); 404 } 405 406 /** 407 * Append a single character onto the FastStringBuffer, growing the 408 * storage if necessary. 409 * <p> 410 * NOTE THAT after calling append(), previously obtained 411 * references to m_array[][] may no longer be valid.... 412 * though in fact they should be in this instance. 413 * 414 * @param value character to be appended. 415 */ 416 public final void append(char value) 417 { 418 419 char[] chunk; 420 421 // We may have preallocated chunks. If so, all but last should 422 // be at full size. 423 424 if (m_firstFree < m_chunkSize) // Simplified test single-character-fits 425 chunk = m_array[m_lastChunk]; 426 else 427 { 428 429 // Extend array? 430 int i = m_array.length; 431 432 if (m_lastChunk + 1 == i) 433 { 434 char[][] newarray = new char[i + 16][]; 435 436 System.arraycopy(m_array, 0, newarray, 0, i); 437 438 m_array = newarray; 439 } 440 441 // Advance one chunk 442 chunk = m_array[++m_lastChunk]; 443 444 if (chunk == null) 445 { 446 447 // Hierarchical encapsulation 448 if (m_lastChunk == 1 << m_rebundleBits 449 && m_chunkBits < m_maxChunkBits) 450 { 451 452 // Should do all the work of both encapsulating 453 // existing data and establishing new sizes/offsets 454 m_innerFSB = new FastStringBuffer(this); 455 } 456 457 // Add a chunk. 458 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 459 } 460 461 m_firstFree = 0; 462 } 463 464 // Space exists in the chunk. Append the character. 465 chunk[m_firstFree++] = value; 466 } 467 468 /** 469 * Append the contents of a String onto the FastStringBuffer, 470 * growing the storage if necessary. 471 * <p> 472 * NOTE THAT after calling append(), previously obtained 473 * references to m_array[] may no longer be valid. 474 * 475 * @param value String whose contents are to be appended. 476 */ 477 public final void append(String value) 478 { 479 480 if (value == null) 481 return; 482 int strlen = value.length(); 483 484 if (0 == strlen) 485 return; 486 487 int copyfrom = 0; 488 char[] chunk = m_array[m_lastChunk]; 489 int available = m_chunkSize - m_firstFree; 490 491 // Repeat while data remains to be copied 492 while (strlen > 0) 493 { 494 495 // Copy what fits 496 if (available > strlen) 497 available = strlen; 498 499 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk], 500 m_firstFree); 501 502 strlen -= available; 503 copyfrom += available; 504 505 // If there's more left, allocate another chunk and continue 506 if (strlen > 0) 507 { 508 509 // Extend array? 510 int i = m_array.length; 511 512 if (m_lastChunk + 1 == i) 513 { 514 char[][] newarray = new char[i + 16][]; 515 516 System.arraycopy(m_array, 0, newarray, 0, i); 517 518 m_array = newarray; 519 } 520 521 // Advance one chunk 522 chunk = m_array[++m_lastChunk]; 523 524 if (chunk == null) 525 { 526 527 // Hierarchical encapsulation 528 if (m_lastChunk == 1 << m_rebundleBits 529 && m_chunkBits < m_maxChunkBits) 530 { 531 532 // Should do all the work of both encapsulating 533 // existing data and establishing new sizes/offsets 534 m_innerFSB = new FastStringBuffer(this); 535 } 536 537 // Add a chunk. 538 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 539 } 540 541 available = m_chunkSize; 542 m_firstFree = 0; 543 } 544 } 545 546 // Adjust the insert point in the last chunk, when we've reached it. 547 m_firstFree += available; 548 } 549 550 /** 551 * Append the contents of a StringBuffer onto the FastStringBuffer, 552 * growing the storage if necessary. 553 * <p> 554 * NOTE THAT after calling append(), previously obtained 555 * references to m_array[] may no longer be valid. 556 * 557 * @param value StringBuffer whose contents are to be appended. 558 */ 559 public final void append(StringBuffer value) 560 { 561 562 if (value == null) 563 return; 564 int strlen = value.length(); 565 566 if (0 == strlen) 567 return; 568 569 int copyfrom = 0; 570 char[] chunk = m_array[m_lastChunk]; 571 int available = m_chunkSize - m_firstFree; 572 573 // Repeat while data remains to be copied 574 while (strlen > 0) 575 { 576 577 // Copy what fits 578 if (available > strlen) 579 available = strlen; 580 581 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk], 582 m_firstFree); 583 584 strlen -= available; 585 copyfrom += available; 586 587 // If there's more left, allocate another chunk and continue 588 if (strlen > 0) 589 { 590 591 // Extend array? 592 int i = m_array.length; 593 594 if (m_lastChunk + 1 == i) 595 { 596 char[][] newarray = new char[i + 16][]; 597 598 System.arraycopy(m_array, 0, newarray, 0, i); 599 600 m_array = newarray; 601 } 602 603 // Advance one chunk 604 chunk = m_array[++m_lastChunk]; 605 606 if (chunk == null) 607 { 608 609 // Hierarchical encapsulation 610 if (m_lastChunk == 1 << m_rebundleBits 611 && m_chunkBits < m_maxChunkBits) 612 { 613 614 // Should do all the work of both encapsulating 615 // existing data and establishing new sizes/offsets 616 m_innerFSB = new FastStringBuffer(this); 617 } 618 619 // Add a chunk. 620 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 621 } 622 623 available = m_chunkSize; 624 m_firstFree = 0; 625 } 626 } 627 628 // Adjust the insert point in the last chunk, when we've reached it. 629 m_firstFree += available; 630 } 631 632 /** 633 * Append part of the contents of a Character Array onto the 634 * FastStringBuffer, growing the storage if necessary. 635 * <p> 636 * NOTE THAT after calling append(), previously obtained 637 * references to m_array[] may no longer be valid. 638 * 639 * @param chars character array from which data is to be copied 640 * @param start offset in chars of first character to be copied, 641 * zero-based. 642 * @param length number of characters to be copied 643 */ 644 public final void append(char[] chars, int start, int length) 645 { 646 647 int strlen = length; 648 649 if (0 == strlen) 650 return; 651 652 int copyfrom = start; 653 char[] chunk = m_array[m_lastChunk]; 654 int available = m_chunkSize - m_firstFree; 655 656 // Repeat while data remains to be copied 657 while (strlen > 0) 658 { 659 660 // Copy what fits 661 if (available > strlen) 662 available = strlen; 663 664 System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree, 665 available); 666 667 strlen -= available; 668 copyfrom += available; 669 670 // If there's more left, allocate another chunk and continue 671 if (strlen > 0) 672 { 673 674 // Extend array? 675 int i = m_array.length; 676 677 if (m_lastChunk + 1 == i) 678 { 679 char[][] newarray = new char[i + 16][]; 680 681 System.arraycopy(m_array, 0, newarray, 0, i); 682 683 m_array = newarray; 684 } 685 686 // Advance one chunk 687 chunk = m_array[++m_lastChunk]; 688 689 if (chunk == null) 690 { 691 692 // Hierarchical encapsulation 693 if (m_lastChunk == 1 << m_rebundleBits 694 && m_chunkBits < m_maxChunkBits) 695 { 696 697 // Should do all the work of both encapsulating 698 // existing data and establishing new sizes/offsets 699 m_innerFSB = new FastStringBuffer(this); 700 } 701 702 // Add a chunk. 703 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 704 } 705 706 available = m_chunkSize; 707 m_firstFree = 0; 708 } 709 } 710 711 // Adjust the insert point in the last chunk, when we've reached it. 712 m_firstFree += available; 713 } 714 715 /** 716 * Append the contents of another FastStringBuffer onto 717 * this FastStringBuffer, growing the storage if necessary. 718 * <p> 719 * NOTE THAT after calling append(), previously obtained 720 * references to m_array[] may no longer be valid. 721 * 722 * @param value FastStringBuffer whose contents are 723 * to be appended. 724 */ 725 public final void append(FastStringBuffer value) 726 { 727 728 // Complicating factor here is that the two buffers may use 729 // different chunk sizes, and even if they're the same we're 730 // probably on a different alignment due to previously appended 731 // data. We have to work through the source in bite-sized chunks. 732 if (value == null) 733 return; 734 int strlen = value.length(); 735 736 if (0 == strlen) 737 return; 738 739 int copyfrom = 0; 740 char[] chunk = m_array[m_lastChunk]; 741 int available = m_chunkSize - m_firstFree; 742 743 // Repeat while data remains to be copied 744 while (strlen > 0) 745 { 746 747 // Copy what fits 748 if (available > strlen) 749 available = strlen; 750 751 int sourcechunk = (copyfrom + value.m_chunkSize - 1) 752 >>> value.m_chunkBits; 753 int sourcecolumn = copyfrom & value.m_chunkMask; 754 int runlength = value.m_chunkSize - sourcecolumn; 755 756 if (runlength > available) 757 runlength = available; 758 759 System.arraycopy(value.m_array[sourcechunk], sourcecolumn, 760 m_array[m_lastChunk], m_firstFree, runlength); 761 762 if (runlength != available) 763 System.arraycopy(value.m_array[sourcechunk + 1], 0, 764 m_array[m_lastChunk], m_firstFree + runlength, 765 available - runlength); 766 767 strlen -= available; 768 copyfrom += available; 769 770 // If there's more left, allocate another chunk and continue 771 if (strlen > 0) 772 { 773 774 // Extend array? 775 int i = m_array.length; 776 777 if (m_lastChunk + 1 == i) 778 { 779 char[][] newarray = new char[i + 16][]; 780 781 System.arraycopy(m_array, 0, newarray, 0, i); 782 783 m_array = newarray; 784 } 785 786 // Advance one chunk 787 chunk = m_array[++m_lastChunk]; 788 789 if (chunk == null) 790 { 791 792 // Hierarchical encapsulation 793 if (m_lastChunk == 1 << m_rebundleBits 794 && m_chunkBits < m_maxChunkBits) 795 { 796 797 // Should do all the work of both encapsulating 798 // existing data and establishing new sizes/offsets 799 m_innerFSB = new FastStringBuffer(this); 800 } 801 802 // Add a chunk. 803 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 804 } 805 806 available = m_chunkSize; 807 m_firstFree = 0; 808 } 809 } 810 811 // Adjust the insert point in the last chunk, when we've reached it. 812 m_firstFree += available; 813 } 814 815 /** 816 * @return true if the specified range of characters are all whitespace, 817 * as defined by XMLCharacterRecognizer. 818 * <p> 819 * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE. 820 * 821 * @param start Offset of first character in the range. 822 * @param length Number of characters to send. 823 */ 824 public boolean isWhitespace(int start, int length) 825 { 826 827 int sourcechunk = start >>> m_chunkBits; 828 int sourcecolumn = start & m_chunkMask; 829 int available = m_chunkSize - sourcecolumn; 830 boolean chunkOK; 831 832 while (length > 0) 833 { 834 int runlength = (length <= available) ? length : available; 835 836 if (sourcechunk == 0 && m_innerFSB != null) 837 chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength); 838 else 839 chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace( 840 m_array[sourcechunk], sourcecolumn, runlength); 841 842 if (!chunkOK) 843 return false; 844 845 length -= runlength; 846 847 ++sourcechunk; 848 849 sourcecolumn = 0; 850 available = m_chunkSize; 851 } 852 853 return true; 854 } 855 856 /** 857 * @param start Offset of first character in the range. 858 * @param length Number of characters to send. 859 * @return a new String object initialized from the specified range of 860 * characters. 861 */ 862 public String getString(int start, int length) 863 { 864 int startColumn = start & m_chunkMask; 865 int startChunk = start >>> m_chunkBits; 866 if (startColumn + length < m_chunkMask && m_innerFSB == null) { 867 return getOneChunkString(startChunk, startColumn, length); 868 } 869 return getString(new StringBuffer(length), startChunk, startColumn, 870 length).toString(); 871 } 872 873 protected String getOneChunkString(int startChunk, int startColumn, 874 int length) { 875 return new String(m_array[startChunk], startColumn, length); 876 } 877 878 /** 879 * @param sb StringBuffer to be appended to 880 * @param start Offset of first character in the range. 881 * @param length Number of characters to send. 882 * @return sb with the requested text appended to it 883 */ 884 StringBuffer getString(StringBuffer sb, int start, int length) 885 { 886 return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length); 887 } 888 889 /** 890 * Internal support for toString() and getString(). 891 * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into 892 * and returns a StringBuffer supplied by the caller. This simplifies 893 * m_innerFSB support. 894 * <p> 895 * Note that this operation has been somewhat deoptimized by the shift to a 896 * chunked array, as there is no factory method to produce a String object 897 * directly from an array of arrays and hence a double copy is needed. 898 * By presetting length we hope to minimize the heap overhead of building 899 * the intermediate StringBuffer. 900 * <p> 901 * (It really is a pity that Java didn't design String as a final subclass 902 * of MutableString, rather than having StringBuffer be a separate hierarchy. 903 * We'd avoid a <strong>lot</strong> of double-buffering.) 904 * 905 * 906 * @param sb 907 * @param startChunk 908 * @param startColumn 909 * @param length 910 * 911 * @return the contents of the FastStringBuffer as a standard Java string. 912 */ 913 StringBuffer getString(StringBuffer sb, int startChunk, int startColumn, 914 int length) 915 { 916 917 int stop = (startChunk << m_chunkBits) + startColumn + length; 918 int stopChunk = stop >>> m_chunkBits; 919 int stopColumn = stop & m_chunkMask; 920 921 // Factored out 922 //StringBuffer sb=new StringBuffer(length); 923 for (int i = startChunk; i < stopChunk; ++i) 924 { 925 if (i == 0 && m_innerFSB != null) 926 m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn); 927 else 928 sb.append(m_array[i], startColumn, m_chunkSize - startColumn); 929 930 startColumn = 0; // after first chunk 931 } 932 933 if (stopChunk == 0 && m_innerFSB != null) 934 m_innerFSB.getString(sb, startColumn, stopColumn - startColumn); 935 else if (stopColumn > startColumn) 936 sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn); 937 938 return sb; 939 } 940 941 /** 942 * Get a single character from the string buffer. 943 * 944 * 945 * @param pos character position requested. 946 * @return A character from the requested position. 947 */ 948 public char charAt(int pos) 949 { 950 int startChunk = pos >>> m_chunkBits; 951 952 if (startChunk == 0 && m_innerFSB != null) 953 return m_innerFSB.charAt(pos & m_chunkMask); 954 else 955 return m_array[startChunk][pos & m_chunkMask]; 956 } 957 958 /** 959 * Sends the specified range of characters as one or more SAX characters() 960 * events. 961 * Note that the buffer reference passed to the ContentHandler may be 962 * invalidated if the FastStringBuffer is edited; it's the user's 963 * responsibility to manage access to the FastStringBuffer to prevent this 964 * problem from arising. 965 * <p> 966 * Note too that there is no promise that the output will be sent as a 967 * single call. As is always true in SAX, one logical string may be split 968 * across multiple blocks of memory and hence delivered as several 969 * successive events. 970 * 971 * @param ch SAX ContentHandler object to receive the event. 972 * @param start Offset of first character in the range. 973 * @param length Number of characters to send. 974 * @exception org.xml.sax.SAXException may be thrown by handler's 975 * characters() method. 976 */ 977 public void sendSAXcharacters( 978 org.xml.sax.ContentHandler ch, int start, int length) 979 throws org.xml.sax.SAXException 980 { 981 982 int startChunk = start >>> m_chunkBits; 983 int startColumn = start & m_chunkMask; 984 if (startColumn + length < m_chunkMask && m_innerFSB == null) { 985 ch.characters(m_array[startChunk], startColumn, length); 986 return; 987 } 988 989 int stop = start + length; 990 int stopChunk = stop >>> m_chunkBits; 991 int stopColumn = stop & m_chunkMask; 992 993 for (int i = startChunk; i < stopChunk; ++i) 994 { 995 if (i == 0 && m_innerFSB != null) 996 m_innerFSB.sendSAXcharacters(ch, startColumn, 997 m_chunkSize - startColumn); 998 else 999 ch.characters(m_array[i], startColumn, m_chunkSize - startColumn); 1000 1001 startColumn = 0; // after first chunk 1002 } 1003 1004 // Last, or only, chunk 1005 if (stopChunk == 0 && m_innerFSB != null) 1006 m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn); 1007 else if (stopColumn > startColumn) 1008 { 1009 ch.characters(m_array[stopChunk], startColumn, 1010 stopColumn - startColumn); 1011 } 1012 } 1013 1014 /** 1015 * Sends the specified range of characters as one or more SAX characters() 1016 * events, normalizing the characters according to XSLT rules. 1017 * 1018 * @param ch SAX ContentHandler object to receive the event. 1019 * @param start Offset of first character in the range. 1020 * @param length Number of characters to send. 1021 * @return normalization status to apply to next chunk (because we may 1022 * have been called recursively to process an inner FSB): 1023 * <dl> 1024 * <dt>0</dt> 1025 * <dd>if this output did not end in retained whitespace, and thus whitespace 1026 * at the start of the following chunk (if any) should be converted to a 1027 * single space. 1028 * <dt>SUPPRESS_LEADING_WS</dt> 1029 * <dd>if this output ended in retained whitespace, and thus whitespace 1030 * at the start of the following chunk (if any) should be completely 1031 * suppressed.</dd> 1032 * </dd> 1033 * </dl> 1034 * @exception org.xml.sax.SAXException may be thrown by handler's 1035 * characters() method. 1036 */ 1037 public int sendNormalizedSAXcharacters( 1038 org.xml.sax.ContentHandler ch, int start, int length) 1039 throws org.xml.sax.SAXException 1040 { 1041 // This call always starts at the beginning of the 1042 // string being written out, either because it was called directly or 1043 // because it was an m_innerFSB recursion. This is important since 1044 // it gives us a well-known initial state for this flag: 1045 int stateForNextChunk=SUPPRESS_LEADING_WS; 1046 1047 int stop = start + length; 1048 int startChunk = start >>> m_chunkBits; 1049 int startColumn = start & m_chunkMask; 1050 int stopChunk = stop >>> m_chunkBits; 1051 int stopColumn = stop & m_chunkMask; 1052 1053 for (int i = startChunk; i < stopChunk; ++i) 1054 { 1055 if (i == 0 && m_innerFSB != null) 1056 stateForNextChunk= 1057 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, 1058 m_chunkSize - startColumn); 1059 else 1060 stateForNextChunk= 1061 sendNormalizedSAXcharacters(m_array[i], startColumn, 1062 m_chunkSize - startColumn, 1063 ch,stateForNextChunk); 1064 1065 startColumn = 0; // after first chunk 1066 } 1067 1068 // Last, or only, chunk 1069 if (stopChunk == 0 && m_innerFSB != null) 1070 stateForNextChunk= // %REVIEW% Is this update really needed? 1071 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn); 1072 else if (stopColumn > startColumn) 1073 { 1074 stateForNextChunk= // %REVIEW% Is this update really needed? 1075 sendNormalizedSAXcharacters(m_array[stopChunk], 1076 startColumn, stopColumn - startColumn, 1077 ch, stateForNextChunk | SUPPRESS_TRAILING_WS); 1078 } 1079 return stateForNextChunk; 1080 } 1081 1082 static final char[] SINGLE_SPACE = {' '}; 1083 1084 /** 1085 * Internal method to directly normalize and dispatch the character array. 1086 * This version is aware of the fact that it may be called several times 1087 * in succession if the data is made up of multiple "chunks", and thus 1088 * must actively manage the handling of leading and trailing whitespace. 1089 * 1090 * Note: The recursion is due to the possible recursion of inner FSBs. 1091 * 1092 * @param ch The characters from the XML document. 1093 * @param start The start position in the array. 1094 * @param length The number of characters to read from the array. 1095 * @param handler SAX ContentHandler object to receive the event. 1096 * @param edgeTreatmentFlags How leading/trailing spaces should be handled. 1097 * This is a bitfield contining two flags, bitwise-ORed together: 1098 * <dl> 1099 * <dt>SUPPRESS_LEADING_WS</dt> 1100 * <dd>When false, causes leading whitespace to be converted to a single 1101 * space; when true, causes it to be discarded entirely. 1102 * Should be set TRUE for the first chunk, and (in multi-chunk output) 1103 * whenever the previous chunk ended in retained whitespace.</dd> 1104 * <dt>SUPPRESS_TRAILING_WS</dt> 1105 * <dd>When false, causes trailing whitespace to be converted to a single 1106 * space; when true, causes it to be discarded entirely. 1107 * Should be set TRUE for the last or only chunk. 1108 * </dd> 1109 * </dl> 1110 * @return normalization status, as in the edgeTreatmentFlags parameter: 1111 * <dl> 1112 * <dt>0</dt> 1113 * <dd>if this output did not end in retained whitespace, and thus whitespace 1114 * at the start of the following chunk (if any) should be converted to a 1115 * single space. 1116 * <dt>SUPPRESS_LEADING_WS</dt> 1117 * <dd>if this output ended in retained whitespace, and thus whitespace 1118 * at the start of the following chunk (if any) should be completely 1119 * suppressed.</dd> 1120 * </dd> 1121 * </dl> 1122 * 1123 * 1124 * @exception org.xml.sax.SAXException Any SAX exception, possibly 1125 * wrapping another exception. 1126 */ 1127 static int sendNormalizedSAXcharacters(char ch[], 1128 int start, int length, 1129 org.xml.sax.ContentHandler handler, 1130 int edgeTreatmentFlags) 1131 throws org.xml.sax.SAXException 1132 { 1133 boolean processingLeadingWhitespace = 1134 ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0); 1135 boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0); 1136 int currPos = start; 1137 int limit = start+length; 1138 1139 // Strip any leading spaces first, if required 1140 if (processingLeadingWhitespace) { 1141 for (; currPos < limit 1142 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1143 currPos++) { } 1144 1145 // If we've only encountered leading spaces, the 1146 // current state remains unchanged 1147 if (currPos == limit) { 1148 return edgeTreatmentFlags; 1149 } 1150 } 1151 1152 // If we get here, there are no more leading spaces to strip 1153 while (currPos < limit) { 1154 int startNonWhitespace = currPos; 1155 1156 // Grab a chunk of non-whitespace characters 1157 for (; currPos < limit 1158 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1159 currPos++) { } 1160 1161 // Non-whitespace seen - emit them, along with a single 1162 // space for any preceding whitespace characters 1163 if (startNonWhitespace != currPos) { 1164 if (seenWhitespace) { 1165 handler.characters(SINGLE_SPACE, 0, 1); 1166 seenWhitespace = false; 1167 } 1168 handler.characters(ch, startNonWhitespace, 1169 currPos - startNonWhitespace); 1170 } 1171 1172 int startWhitespace = currPos; 1173 1174 // Consume any whitespace characters 1175 for (; currPos < limit 1176 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1177 currPos++) { } 1178 1179 if (startWhitespace != currPos) { 1180 seenWhitespace = true; 1181 } 1182 } 1183 1184 return (seenWhitespace ? CARRY_WS : 0) 1185 | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS); 1186 } 1187 1188 /** 1189 * Directly normalize and dispatch the character array. 1190 * 1191 * @param ch The characters from the XML document. 1192 * @param start The start position in the array. 1193 * @param length The number of characters to read from the array. 1194 * @param handler SAX ContentHandler object to receive the event. 1195 * @exception org.xml.sax.SAXException Any SAX exception, possibly 1196 * wrapping another exception. 1197 */ 1198 public static void sendNormalizedSAXcharacters(char ch[], 1199 int start, int length, 1200 org.xml.sax.ContentHandler handler) 1201 throws org.xml.sax.SAXException 1202 { 1203 sendNormalizedSAXcharacters(ch, start, length, 1204 handler, SUPPRESS_BOTH); 1205 } 1206 1207 /** 1208 * Sends the specified range of characters as sax Comment. 1209 * <p> 1210 * Note that, unlike sendSAXcharacters, this has to be done as a single 1211 * call to LexicalHandler#comment. 1212 * 1213 * @param ch SAX LexicalHandler object to receive the event. 1214 * @param start Offset of first character in the range. 1215 * @param length Number of characters to send. 1216 * @exception org.xml.sax.SAXException may be thrown by handler's 1217 * characters() method. 1218 */ 1219 public void sendSAXComment( 1220 org.xml.sax.ext.LexicalHandler ch, int start, int length) 1221 throws org.xml.sax.SAXException 1222 { 1223 1224 // %OPT% Do it this way for now... 1225 String comment = getString(start, length); 1226 ch.comment(comment.toCharArray(), 0, length); 1227 } 1228 1229 /** 1230 * Copies characters from this string into the destination character 1231 * array. 1232 * 1233 * @param srcBegin index of the first character in the string 1234 * to copy. 1235 * @param srcEnd index after the last character in the string 1236 * to copy. 1237 * @param dst the destination array. 1238 * @param dstBegin the start offset in the destination array. 1239 * @exception IndexOutOfBoundsException If any of the following 1240 * is true: 1241 * <ul><li><code>srcBegin</code> is negative. 1242 * <li><code>srcBegin</code> is greater than <code>srcEnd</code> 1243 * <li><code>srcEnd</code> is greater than the length of this 1244 * string 1245 * <li><code>dstBegin</code> is negative 1246 * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than 1247 * <code>dst.length</code></ul> 1248 * @exception NullPointerException if <code>dst</code> is <code>null</code> 1249 */ 1250 private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 1251 { 1252 // %TBD% Joe needs to write this function. Make public when implemented. 1253 } 1254 1255 /** 1256 * Encapsulation c'tor. After this is called, the source FastStringBuffer 1257 * will be reset to use the new object as its m_innerFSB, and will have 1258 * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED 1259 * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits) 1260 * 1261 * NEEDSDOC @param source 1262 */ 1263 private FastStringBuffer(FastStringBuffer source) 1264 { 1265 1266 // Copy existing information into new encapsulation 1267 m_chunkBits = source.m_chunkBits; 1268 m_maxChunkBits = source.m_maxChunkBits; 1269 m_rebundleBits = source.m_rebundleBits; 1270 m_chunkSize = source.m_chunkSize; 1271 m_chunkMask = source.m_chunkMask; 1272 m_array = source.m_array; 1273 m_innerFSB = source.m_innerFSB; 1274 1275 // These have to be adjusted because we're calling just at the time 1276 // when we would be about to allocate another chunk 1277 m_lastChunk = source.m_lastChunk - 1; 1278 m_firstFree = source.m_chunkSize; 1279 1280 // Establish capsule as the Inner FSB, reset chunk sizes/addressing 1281 source.m_array = new char[16][]; 1282 source.m_innerFSB = this; 1283 1284 // Since we encapsulated just as we were about to append another 1285 // chunk, return ready to create the chunk after the innerFSB 1286 // -- 1, not 0. 1287 source.m_lastChunk = 1; 1288 source.m_firstFree = 0; 1289 source.m_chunkBits += m_rebundleBits; 1290 source.m_chunkSize = 1 << (source.m_chunkBits); 1291 source.m_chunkMask = source.m_chunkSize - 1; 1292 } 1293 } 1294