Home | History | Annotate | Download | only in utils
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements. See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership. The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the  "License");
      7  * you may not use this file except in compliance with the License.
      8  * You may obtain a copy of the License at
      9  *
     10  *     http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing, software
     13  * distributed under the License is distributed on an "AS IS" BASIS,
     14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15  * See the License for the specific language governing permissions and
     16  * limitations under the License.
     17  */
     18 /*
     19  * $Id: FastStringBuffer.java 469279 2006-10-30 21:18:02Z minchau $
     20  */
     21 package org.apache.xml.utils;
     22 
     23 /**
     24  * Bare-bones, unsafe, fast string buffer. No thread-safety, no
     25  * parameter range checking, exposed fields. Note that in typical
     26  * applications, thread-safety of a StringBuffer is a somewhat
     27  * dubious concept in any case.
     28  * <p>
     29  * Note that Stree and DTM used a single FastStringBuffer as a string pool,
     30  * by recording start and length indices within this single buffer. This
     31  * minimizes heap overhead, but of course requires more work when retrieving
     32  * the data.
     33  * <p>
     34  * FastStringBuffer operates as a "chunked buffer". Doing so
     35  * reduces the need to recopy existing information when an append
     36  * exceeds the space available; we just allocate another chunk and
     37  * flow across to it. (The array of chunks may need to grow,
     38  * admittedly, but that's a much smaller object.) Some excess
     39  * recopying may arise when we extract Strings which cross chunk
     40  * boundaries; larger chunks make that less frequent.
     41  * <p>
     42  * The size values are parameterized, to allow tuning this code. In
     43  * theory, Result Tree Fragments might want to be tuned differently
     44  * from the main document's text.
     45  * <p>
     46  * %REVIEW% An experiment in self-tuning is
     47  * included in the code (using nested FastStringBuffers to achieve
     48  * variation in chunk sizes), but this implementation has proven to
     49  * be problematic when data may be being copied from the FSB into itself.
     50  * We should either re-architect that to make this safe (if possible)
     51  * or remove that code and clean up for performance/maintainability reasons.
     52  * <p>
     53  */
     54 public class FastStringBuffer
     55 {
     56   // If nonzero, forces the inial chunk size.
     57   /**/static final int DEBUG_FORCE_INIT_BITS=0;
     58 
     59   	// %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
     60   	// back into the same FSB (variable set from previous variable, for example)
     61   	// and blocksize changes in mid-copy... there's risk of severe malfunction in
     62   	// the read process, due to how the resizing code re-jiggers storage. Arggh.
     63   	// If we want to retain the variable-size-block feature, we need to reconsider
     64   	// that issue. For now, I have forced us into fixed-size mode.
     65     static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
     66 
     67 	/** Manifest constant: Suppress leading whitespace.
     68 	 * This should be used when normalize-to-SAX is called for the first chunk of a
     69 	 * multi-chunk output, or one following unsuppressed whitespace in a previous
     70 	 * chunk.
     71 	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
     72 	 */
     73 	public static final int SUPPRESS_LEADING_WS=0x01;
     74 
     75 	/** Manifest constant: Suppress trailing whitespace.
     76 	 * This should be used when normalize-to-SAX is called for the last chunk of a
     77 	 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
     78 	 */
     79 	public static final int SUPPRESS_TRAILING_WS=0x02;
     80 
     81 	/** Manifest constant: Suppress both leading and trailing whitespace.
     82 	 * This should be used when normalize-to-SAX is called for a complete string.
     83 	 * (I'm not wild about the name of this one. Ideas welcome.)
     84 	 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
     85 	 */
     86 	public static final int SUPPRESS_BOTH
     87 		= SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
     88 
     89 	/** Manifest constant: Carry trailing whitespace of one chunk as leading
     90 	 * whitespace of the next chunk. Used internally; I don't see any reason
     91 	 * to make it public right now.
     92 	 */
     93 	private static final int CARRY_WS=0x04;
     94 
     95 	/**
     96    * Field m_chunkBits sets our chunking strategy, by saying how many
     97    * bits of index can be used within a single chunk before flowing over
     98    * to the next chunk. For example, if m_chunkbits is set to 15, each
     99    * chunk can contain up to 2^15 (32K) characters
    100    */
    101   int m_chunkBits = 15;
    102 
    103   /**
    104    * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
    105    * the largest permissible chunk size is in this particular FastStringBuffer
    106    * hierarchy.
    107    */
    108   int m_maxChunkBits = 15;
    109 
    110   /**
    111    * Field m_rechunkBits affects our chunk-growth strategy, by saying how
    112    * many chunks should be allocated at one size before we encapsulate them
    113    * into the first chunk of the next size up. For example, if m_rechunkBits
    114    * is set to 3, then after 8 chunks at a given size we will rebundle
    115    * them as the first element of a FastStringBuffer using a chunk size
    116    * 8 times larger (chunkBits shifted left three bits).
    117    */
    118   int m_rebundleBits = 2;
    119 
    120   /**
    121    * Field m_chunkSize establishes the maximum size of one chunk of the array
    122    * as 2**chunkbits characters.
    123    * (Which may also be the minimum size if we aren't tuning for storage)
    124    */
    125   int m_chunkSize;  // =1<<(m_chunkBits-1);
    126 
    127   /**
    128    * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
    129    * worth of low-order '1' bits, useful for shift-and-mask addressing
    130    * within the chunks.
    131    */
    132   int m_chunkMask;  // =m_chunkSize-1;
    133 
    134   /**
    135    * Field m_array holds the string buffer's text contents, using an
    136    * array-of-arrays. Note that this array, and the arrays it contains, may be
    137    * reallocated when necessary in order to allow the buffer to grow;
    138    * references to them should be considered to be invalidated after any
    139    * append. However, the only time these arrays are directly exposed
    140    * is in the sendSAXcharacters call.
    141    */
    142   char[][] m_array;
    143 
    144   /**
    145    * Field m_lastChunk is an index into m_array[], pointing to the last
    146    * chunk of the Chunked Array currently in use. Note that additional
    147    * chunks may actually be allocated, eg if the FastStringBuffer had
    148    * previously been truncated or if someone issued an ensureSpace request.
    149    * <p>
    150    * The insertion point for append operations is addressed by the combination
    151    * of m_lastChunk and m_firstFree.
    152    */
    153   int m_lastChunk = 0;
    154 
    155   /**
    156    * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
    157    * the first character in the Chunked Array which is not part of the
    158    * FastStringBuffer's current content. Since m_array[][] is zero-based,
    159    * the length of that content can be calculated as
    160    * (m_lastChunk<<m_chunkBits) + m_firstFree
    161    */
    162   int m_firstFree = 0;
    163 
    164   /**
    165    * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
    166    * length equals m_chunkSize, and which replaces m_array[0]. This allows
    167    * building a hierarchy of FastStringBuffers, where early appends use
    168    * a smaller chunkSize (for less wasted memory overhead) but later
    169    * ones use a larger chunkSize (for less heap activity overhead).
    170    */
    171   FastStringBuffer m_innerFSB = null;
    172 
    173   /**
    174    * Construct a FastStringBuffer, with allocation policy as per parameters.
    175    * <p>
    176    * For coding convenience, I've expressed both allocation sizes in terms of
    177    * a number of bits. That's needed for the final size of a chunk,
    178    * to permit fast and efficient shift-and-mask addressing. It's less critical
    179    * for the inital size, and may be reconsidered.
    180    * <p>
    181    * An alternative would be to accept integer sizes and round to powers of two;
    182    * that really doesn't seem to buy us much, if anything.
    183    *
    184    * @param initChunkBits Length in characters of the initial allocation
    185    * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
    186    * characters.) Later chunks will use larger allocation units, to trade off
    187    * allocation speed of large document against storage efficiency of small
    188    * ones.
    189    * @param maxChunkBits Number of character-offset bits that should be used for
    190    * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
    191    * characters.
    192    * @param rebundleBits Number of character-offset bits that addressing should
    193    * advance before we attempt to take a step from initChunkBits to maxChunkBits
    194    */
    195   public FastStringBuffer(int initChunkBits, int maxChunkBits,
    196                           int rebundleBits)
    197   {
    198     if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
    199 
    200     // %REVIEW%
    201     // Should this force to larger value, or smaller? Smaller less efficient, but if
    202     // someone requested variable mode it's because they care about storage space.
    203     // On the other hand, given the other changes I'm making, odds are that we should
    204     // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
    205     // anyway; we need a permanant solution.
    206     //
    207     if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
    208     //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
    209 
    210     m_array = new char[16][];
    211 
    212     // Don't bite off more than we're prepared to swallow!
    213     if (initChunkBits > maxChunkBits)
    214       initChunkBits = maxChunkBits;
    215 
    216     m_chunkBits = initChunkBits;
    217     m_maxChunkBits = maxChunkBits;
    218     m_rebundleBits = rebundleBits;
    219     m_chunkSize = 1 << (initChunkBits);
    220     m_chunkMask = m_chunkSize - 1;
    221     m_array[0] = new char[m_chunkSize];
    222   }
    223 
    224   /**
    225    * Construct a FastStringBuffer, using a default rebundleBits value.
    226    *
    227    * NEEDSDOC @param initChunkBits
    228    * NEEDSDOC @param maxChunkBits
    229    */
    230   public FastStringBuffer(int initChunkBits, int maxChunkBits)
    231   {
    232     this(initChunkBits, maxChunkBits, 2);
    233   }
    234 
    235   /**
    236    * Construct a FastStringBuffer, using default maxChunkBits and
    237    * rebundleBits values.
    238    * <p>
    239    * ISSUE: Should this call assert initial size, or fixed size?
    240    * Now configured as initial, with a default for fixed.
    241    *
    242    * NEEDSDOC @param initChunkBits
    243    */
    244   public FastStringBuffer(int initChunkBits)
    245   {
    246     this(initChunkBits, 15, 2);
    247   }
    248 
    249   /**
    250    * Construct a FastStringBuffer, using a default allocation policy.
    251    */
    252   public FastStringBuffer()
    253   {
    254 
    255     // 10 bits is 1K. 15 bits is 32K. Remember that these are character
    256     // counts, so actual memory allocation unit is doubled for UTF-16 chars.
    257     //
    258     // For reference: In the original FastStringBuffer, we simply
    259     // overallocated by blocksize (default 1KB) on each buffer-growth.
    260     this(10, 15, 2);
    261   }
    262 
    263   /**
    264    * Get the length of the list. Synonym for length().
    265    *
    266    * @return the number of characters in the FastStringBuffer's content.
    267    */
    268   public final int size()
    269   {
    270     return (m_lastChunk << m_chunkBits) + m_firstFree;
    271   }
    272 
    273   /**
    274    * Get the length of the list. Synonym for size().
    275    *
    276    * @return the number of characters in the FastStringBuffer's content.
    277    */
    278   public final int length()
    279   {
    280     return (m_lastChunk << m_chunkBits) + m_firstFree;
    281   }
    282 
    283   /**
    284    * Discard the content of the FastStringBuffer, and most of the memory
    285    * that was allocated by it, restoring the initial state. Note that this
    286    * may eventually be different from setLength(0), which see.
    287    */
    288   public final void reset()
    289   {
    290 
    291     m_lastChunk = 0;
    292     m_firstFree = 0;
    293 
    294     // Recover the original chunk size
    295     FastStringBuffer innermost = this;
    296 
    297     while (innermost.m_innerFSB != null)
    298     {
    299       innermost = innermost.m_innerFSB;
    300     }
    301 
    302     m_chunkBits = innermost.m_chunkBits;
    303     m_chunkSize = innermost.m_chunkSize;
    304     m_chunkMask = innermost.m_chunkMask;
    305 
    306     // Discard the hierarchy
    307     m_innerFSB = null;
    308     m_array = new char[16][0];
    309     m_array[0] = new char[m_chunkSize];
    310   }
    311 
    312   /**
    313    * Directly set how much of the FastStringBuffer's storage is to be
    314    * considered part of its content. This is a fast but hazardous
    315    * operation. It is not protected against negative values, or values
    316    * greater than the amount of storage currently available... and even
    317    * if additional storage does exist, its contents are unpredictable.
    318    * The only safe use for our setLength() is to truncate the FastStringBuffer
    319    * to a shorter string.
    320    *
    321    * @param l New length. If l<0 or l>=getLength(), this operation will
    322    * not report an error but future operations will almost certainly fail.
    323    */
    324   public final void setLength(int l)
    325   {
    326     m_lastChunk = l >>> m_chunkBits;
    327 
    328     if (m_lastChunk == 0 && m_innerFSB != null)
    329     {
    330       // Replace this FSB with the appropriate inner FSB, truncated
    331       m_innerFSB.setLength(l, this);
    332     }
    333     else
    334     {
    335       m_firstFree = l & m_chunkMask;
    336 
    337 	  // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
    338 	  // us pointing at the start of a chunk which has not yet been allocated. Rather than
    339 	  // pay the cost of dealing with that in the append loops (more scattered and more
    340 	  // inner-loop), we correct it here by moving to the safe side of that
    341 	  // line -- as we would have left the indexes had we appended up to that point.
    342       if(m_firstFree==0 && m_lastChunk>0)
    343       {
    344       	--m_lastChunk;
    345       	m_firstFree=m_chunkSize;
    346       }
    347     }
    348   }
    349 
    350   /**
    351    * Subroutine for the public setLength() method. Deals with the fact
    352    * that truncation may require restoring one of the innerFSBs
    353    *
    354    * NEEDSDOC @param l
    355    * NEEDSDOC @param rootFSB
    356    */
    357   private final void setLength(int l, FastStringBuffer rootFSB)
    358   {
    359 
    360     m_lastChunk = l >>> m_chunkBits;
    361 
    362     if (m_lastChunk == 0 && m_innerFSB != null)
    363     {
    364       m_innerFSB.setLength(l, rootFSB);
    365     }
    366     else
    367     {
    368 
    369       // Undo encapsulation -- pop the innerFSB data back up to root.
    370       // Inefficient, but attempts to keep the code simple.
    371       rootFSB.m_chunkBits = m_chunkBits;
    372       rootFSB.m_maxChunkBits = m_maxChunkBits;
    373       rootFSB.m_rebundleBits = m_rebundleBits;
    374       rootFSB.m_chunkSize = m_chunkSize;
    375       rootFSB.m_chunkMask = m_chunkMask;
    376       rootFSB.m_array = m_array;
    377       rootFSB.m_innerFSB = m_innerFSB;
    378       rootFSB.m_lastChunk = m_lastChunk;
    379 
    380       // Finally, truncate this sucker.
    381       rootFSB.m_firstFree = l & m_chunkMask;
    382     }
    383   }
    384 
    385   /**
    386    * Note that this operation has been somewhat deoptimized by the shift to a
    387    * chunked array, as there is no factory method to produce a String object
    388    * directly from an array of arrays and hence a double copy is needed.
    389    * By using ensureCapacity we hope to minimize the heap overhead of building
    390    * the intermediate StringBuffer.
    391    * <p>
    392    * (It really is a pity that Java didn't design String as a final subclass
    393    * of MutableString, rather than having StringBuffer be a separate hierarchy.
    394    * We'd avoid a <strong>lot</strong> of double-buffering.)
    395    *
    396    * @return the contents of the FastStringBuffer as a standard Java string.
    397    */
    398   public final String toString()
    399   {
    400 
    401     int length = (m_lastChunk << m_chunkBits) + m_firstFree;
    402 
    403     return getString(new StringBuffer(length), 0, 0, length).toString();
    404   }
    405 
    406   /**
    407    * Append a single character onto the FastStringBuffer, growing the
    408    * storage if necessary.
    409    * <p>
    410    * NOTE THAT after calling append(), previously obtained
    411    * references to m_array[][] may no longer be valid....
    412    * though in fact they should be in this instance.
    413    *
    414    * @param value character to be appended.
    415    */
    416   public final void append(char value)
    417   {
    418 
    419     char[] chunk;
    420 
    421     // We may have preallocated chunks. If so, all but last should
    422     // be at full size.
    423 
    424     if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
    425       chunk = m_array[m_lastChunk];
    426     else
    427     {
    428 
    429       // Extend array?
    430       int i = m_array.length;
    431 
    432       if (m_lastChunk + 1 == i)
    433       {
    434         char[][] newarray = new char[i + 16][];
    435 
    436         System.arraycopy(m_array, 0, newarray, 0, i);
    437 
    438         m_array = newarray;
    439       }
    440 
    441       // Advance one chunk
    442       chunk = m_array[++m_lastChunk];
    443 
    444       if (chunk == null)
    445       {
    446 
    447         // Hierarchical encapsulation
    448         if (m_lastChunk == 1 << m_rebundleBits
    449                 && m_chunkBits < m_maxChunkBits)
    450         {
    451 
    452           // Should do all the work of both encapsulating
    453           // existing data and establishing new sizes/offsets
    454           m_innerFSB = new FastStringBuffer(this);
    455         }
    456 
    457         // Add a chunk.
    458         chunk = m_array[m_lastChunk] = new char[m_chunkSize];
    459       }
    460 
    461       m_firstFree = 0;
    462     }
    463 
    464     // Space exists in the chunk. Append the character.
    465     chunk[m_firstFree++] = value;
    466   }
    467 
    468   /**
    469    * Append the contents of a String onto the FastStringBuffer,
    470    * growing the storage if necessary.
    471    * <p>
    472    * NOTE THAT after calling append(), previously obtained
    473    * references to m_array[] may no longer be valid.
    474    *
    475    * @param value String whose contents are to be appended.
    476    */
    477   public final void append(String value)
    478   {
    479 
    480     if (value == null)
    481       return;
    482     int strlen = value.length();
    483 
    484     if (0 == strlen)
    485       return;
    486 
    487     int copyfrom = 0;
    488     char[] chunk = m_array[m_lastChunk];
    489     int available = m_chunkSize - m_firstFree;
    490 
    491     // Repeat while data remains to be copied
    492     while (strlen > 0)
    493     {
    494 
    495       // Copy what fits
    496       if (available > strlen)
    497         available = strlen;
    498 
    499       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
    500                      m_firstFree);
    501 
    502       strlen -= available;
    503       copyfrom += available;
    504 
    505       // If there's more left, allocate another chunk and continue
    506       if (strlen > 0)
    507       {
    508 
    509         // Extend array?
    510         int i = m_array.length;
    511 
    512         if (m_lastChunk + 1 == i)
    513         {
    514           char[][] newarray = new char[i + 16][];
    515 
    516           System.arraycopy(m_array, 0, newarray, 0, i);
    517 
    518           m_array = newarray;
    519         }
    520 
    521         // Advance one chunk
    522         chunk = m_array[++m_lastChunk];
    523 
    524         if (chunk == null)
    525         {
    526 
    527           // Hierarchical encapsulation
    528           if (m_lastChunk == 1 << m_rebundleBits
    529                   && m_chunkBits < m_maxChunkBits)
    530           {
    531 
    532             // Should do all the work of both encapsulating
    533             // existing data and establishing new sizes/offsets
    534             m_innerFSB = new FastStringBuffer(this);
    535           }
    536 
    537           // Add a chunk.
    538           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
    539         }
    540 
    541         available = m_chunkSize;
    542         m_firstFree = 0;
    543       }
    544     }
    545 
    546     // Adjust the insert point in the last chunk, when we've reached it.
    547     m_firstFree += available;
    548   }
    549 
    550   /**
    551    * Append the contents of a StringBuffer onto the FastStringBuffer,
    552    * growing the storage if necessary.
    553    * <p>
    554    * NOTE THAT after calling append(), previously obtained
    555    * references to m_array[] may no longer be valid.
    556    *
    557    * @param value StringBuffer whose contents are to be appended.
    558    */
    559   public final void append(StringBuffer value)
    560   {
    561 
    562     if (value == null)
    563       return;
    564     int strlen = value.length();
    565 
    566     if (0 == strlen)
    567       return;
    568 
    569     int copyfrom = 0;
    570     char[] chunk = m_array[m_lastChunk];
    571     int available = m_chunkSize - m_firstFree;
    572 
    573     // Repeat while data remains to be copied
    574     while (strlen > 0)
    575     {
    576 
    577       // Copy what fits
    578       if (available > strlen)
    579         available = strlen;
    580 
    581       value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
    582                      m_firstFree);
    583 
    584       strlen -= available;
    585       copyfrom += available;
    586 
    587       // If there's more left, allocate another chunk and continue
    588       if (strlen > 0)
    589       {
    590 
    591         // Extend array?
    592         int i = m_array.length;
    593 
    594         if (m_lastChunk + 1 == i)
    595         {
    596           char[][] newarray = new char[i + 16][];
    597 
    598           System.arraycopy(m_array, 0, newarray, 0, i);
    599 
    600           m_array = newarray;
    601         }
    602 
    603         // Advance one chunk
    604         chunk = m_array[++m_lastChunk];
    605 
    606         if (chunk == null)
    607         {
    608 
    609           // Hierarchical encapsulation
    610           if (m_lastChunk == 1 << m_rebundleBits
    611                   && m_chunkBits < m_maxChunkBits)
    612           {
    613 
    614             // Should do all the work of both encapsulating
    615             // existing data and establishing new sizes/offsets
    616             m_innerFSB = new FastStringBuffer(this);
    617           }
    618 
    619           // Add a chunk.
    620           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
    621         }
    622 
    623         available = m_chunkSize;
    624         m_firstFree = 0;
    625       }
    626     }
    627 
    628     // Adjust the insert point in the last chunk, when we've reached it.
    629     m_firstFree += available;
    630   }
    631 
    632   /**
    633    * Append part of the contents of a Character Array onto the
    634    * FastStringBuffer,  growing the storage if necessary.
    635    * <p>
    636    * NOTE THAT after calling append(), previously obtained
    637    * references to m_array[] may no longer be valid.
    638    *
    639    * @param chars character array from which data is to be copied
    640    * @param start offset in chars of first character to be copied,
    641    * zero-based.
    642    * @param length number of characters to be copied
    643    */
    644   public final void append(char[] chars, int start, int length)
    645   {
    646 
    647     int strlen = length;
    648 
    649     if (0 == strlen)
    650       return;
    651 
    652     int copyfrom = start;
    653     char[] chunk = m_array[m_lastChunk];
    654     int available = m_chunkSize - m_firstFree;
    655 
    656     // Repeat while data remains to be copied
    657     while (strlen > 0)
    658     {
    659 
    660       // Copy what fits
    661       if (available > strlen)
    662         available = strlen;
    663 
    664       System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
    665                        available);
    666 
    667       strlen -= available;
    668       copyfrom += available;
    669 
    670       // If there's more left, allocate another chunk and continue
    671       if (strlen > 0)
    672       {
    673 
    674         // Extend array?
    675         int i = m_array.length;
    676 
    677         if (m_lastChunk + 1 == i)
    678         {
    679           char[][] newarray = new char[i + 16][];
    680 
    681           System.arraycopy(m_array, 0, newarray, 0, i);
    682 
    683           m_array = newarray;
    684         }
    685 
    686         // Advance one chunk
    687         chunk = m_array[++m_lastChunk];
    688 
    689         if (chunk == null)
    690         {
    691 
    692           // Hierarchical encapsulation
    693           if (m_lastChunk == 1 << m_rebundleBits
    694                   && m_chunkBits < m_maxChunkBits)
    695           {
    696 
    697             // Should do all the work of both encapsulating
    698             // existing data and establishing new sizes/offsets
    699             m_innerFSB = new FastStringBuffer(this);
    700           }
    701 
    702           // Add a chunk.
    703           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
    704         }
    705 
    706         available = m_chunkSize;
    707         m_firstFree = 0;
    708       }
    709     }
    710 
    711     // Adjust the insert point in the last chunk, when we've reached it.
    712     m_firstFree += available;
    713   }
    714 
    715   /**
    716    * Append the contents of another FastStringBuffer onto
    717    * this FastStringBuffer, growing the storage if necessary.
    718    * <p>
    719    * NOTE THAT after calling append(), previously obtained
    720    * references to m_array[] may no longer be valid.
    721    *
    722    * @param value FastStringBuffer whose contents are
    723    * to be appended.
    724    */
    725   public final void append(FastStringBuffer value)
    726   {
    727 
    728     // Complicating factor here is that the two buffers may use
    729     // different chunk sizes, and even if they're the same we're
    730     // probably on a different alignment due to previously appended
    731     // data. We have to work through the source in bite-sized chunks.
    732     if (value == null)
    733       return;
    734     int strlen = value.length();
    735 
    736     if (0 == strlen)
    737       return;
    738 
    739     int copyfrom = 0;
    740     char[] chunk = m_array[m_lastChunk];
    741     int available = m_chunkSize - m_firstFree;
    742 
    743     // Repeat while data remains to be copied
    744     while (strlen > 0)
    745     {
    746 
    747       // Copy what fits
    748       if (available > strlen)
    749         available = strlen;
    750 
    751       int sourcechunk = (copyfrom + value.m_chunkSize - 1)
    752                         >>> value.m_chunkBits;
    753       int sourcecolumn = copyfrom & value.m_chunkMask;
    754       int runlength = value.m_chunkSize - sourcecolumn;
    755 
    756       if (runlength > available)
    757         runlength = available;
    758 
    759       System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
    760                        m_array[m_lastChunk], m_firstFree, runlength);
    761 
    762       if (runlength != available)
    763         System.arraycopy(value.m_array[sourcechunk + 1], 0,
    764                          m_array[m_lastChunk], m_firstFree + runlength,
    765                          available - runlength);
    766 
    767       strlen -= available;
    768       copyfrom += available;
    769 
    770       // If there's more left, allocate another chunk and continue
    771       if (strlen > 0)
    772       {
    773 
    774         // Extend array?
    775         int i = m_array.length;
    776 
    777         if (m_lastChunk + 1 == i)
    778         {
    779           char[][] newarray = new char[i + 16][];
    780 
    781           System.arraycopy(m_array, 0, newarray, 0, i);
    782 
    783           m_array = newarray;
    784         }
    785 
    786         // Advance one chunk
    787         chunk = m_array[++m_lastChunk];
    788 
    789         if (chunk == null)
    790         {
    791 
    792           // Hierarchical encapsulation
    793           if (m_lastChunk == 1 << m_rebundleBits
    794                   && m_chunkBits < m_maxChunkBits)
    795           {
    796 
    797             // Should do all the work of both encapsulating
    798             // existing data and establishing new sizes/offsets
    799             m_innerFSB = new FastStringBuffer(this);
    800           }
    801 
    802           // Add a chunk.
    803           chunk = m_array[m_lastChunk] = new char[m_chunkSize];
    804         }
    805 
    806         available = m_chunkSize;
    807         m_firstFree = 0;
    808       }
    809     }
    810 
    811     // Adjust the insert point in the last chunk, when we've reached it.
    812     m_firstFree += available;
    813   }
    814 
    815   /**
    816    * @return true if the specified range of characters are all whitespace,
    817    * as defined by XMLCharacterRecognizer.
    818    * <p>
    819    * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
    820    *
    821    * @param start Offset of first character in the range.
    822    * @param length Number of characters to send.
    823    */
    824   public boolean isWhitespace(int start, int length)
    825   {
    826 
    827     int sourcechunk = start >>> m_chunkBits;
    828     int sourcecolumn = start & m_chunkMask;
    829     int available = m_chunkSize - sourcecolumn;
    830     boolean chunkOK;
    831 
    832     while (length > 0)
    833     {
    834       int runlength = (length <= available) ? length : available;
    835 
    836       if (sourcechunk == 0 && m_innerFSB != null)
    837         chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
    838       else
    839         chunkOK = org.apache.xml.utils.XMLCharacterRecognizer.isWhiteSpace(
    840           m_array[sourcechunk], sourcecolumn, runlength);
    841 
    842       if (!chunkOK)
    843         return false;
    844 
    845       length -= runlength;
    846 
    847       ++sourcechunk;
    848 
    849       sourcecolumn = 0;
    850       available = m_chunkSize;
    851     }
    852 
    853     return true;
    854   }
    855 
    856   /**
    857    * @param start Offset of first character in the range.
    858    * @param length Number of characters to send.
    859    * @return a new String object initialized from the specified range of
    860    * characters.
    861    */
    862   public String getString(int start, int length)
    863   {
    864     int startColumn = start & m_chunkMask;
    865     int startChunk = start >>> m_chunkBits;
    866     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
    867       return getOneChunkString(startChunk, startColumn, length);
    868     }
    869     return getString(new StringBuffer(length), startChunk, startColumn,
    870                      length).toString();
    871   }
    872 
    873   protected String getOneChunkString(int startChunk, int startColumn,
    874                                      int length) {
    875     return new String(m_array[startChunk], startColumn, length);
    876   }
    877 
    878   /**
    879    * @param sb StringBuffer to be appended to
    880    * @param start Offset of first character in the range.
    881    * @param length Number of characters to send.
    882    * @return sb with the requested text appended to it
    883    */
    884   StringBuffer getString(StringBuffer sb, int start, int length)
    885   {
    886     return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
    887   }
    888 
    889   /**
    890    * Internal support for toString() and getString().
    891    * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
    892    * and returns a StringBuffer supplied by the caller. This simplifies
    893    * m_innerFSB support.
    894    * <p>
    895    * Note that this operation has been somewhat deoptimized by the shift to a
    896    * chunked array, as there is no factory method to produce a String object
    897    * directly from an array of arrays and hence a double copy is needed.
    898    * By presetting length we hope to minimize the heap overhead of building
    899    * the intermediate StringBuffer.
    900    * <p>
    901    * (It really is a pity that Java didn't design String as a final subclass
    902    * of MutableString, rather than having StringBuffer be a separate hierarchy.
    903    * We'd avoid a <strong>lot</strong> of double-buffering.)
    904    *
    905    *
    906    * @param sb
    907    * @param startChunk
    908    * @param startColumn
    909    * @param length
    910    *
    911    * @return the contents of the FastStringBuffer as a standard Java string.
    912    */
    913   StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
    914                          int length)
    915   {
    916 
    917     int stop = (startChunk << m_chunkBits) + startColumn + length;
    918     int stopChunk = stop >>> m_chunkBits;
    919     int stopColumn = stop & m_chunkMask;
    920 
    921     // Factored out
    922     //StringBuffer sb=new StringBuffer(length);
    923     for (int i = startChunk; i < stopChunk; ++i)
    924     {
    925       if (i == 0 && m_innerFSB != null)
    926         m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
    927       else
    928         sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
    929 
    930       startColumn = 0;  // after first chunk
    931     }
    932 
    933     if (stopChunk == 0 && m_innerFSB != null)
    934       m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
    935     else if (stopColumn > startColumn)
    936       sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
    937 
    938     return sb;
    939   }
    940 
    941   /**
    942    * Get a single character from the string buffer.
    943    *
    944    *
    945    * @param pos character position requested.
    946    * @return A character from the requested position.
    947    */
    948   public char charAt(int pos)
    949   {
    950     int startChunk = pos >>> m_chunkBits;
    951 
    952     if (startChunk == 0 && m_innerFSB != null)
    953       return m_innerFSB.charAt(pos & m_chunkMask);
    954     else
    955       return m_array[startChunk][pos & m_chunkMask];
    956   }
    957 
    958   /**
    959    * Sends the specified range of characters as one or more SAX characters()
    960    * events.
    961    * Note that the buffer reference passed to the ContentHandler may be
    962    * invalidated if the FastStringBuffer is edited; it's the user's
    963    * responsibility to manage access to the FastStringBuffer to prevent this
    964    * problem from arising.
    965    * <p>
    966    * Note too that there is no promise that the output will be sent as a
    967    * single call. As is always true in SAX, one logical string may be split
    968    * across multiple blocks of memory and hence delivered as several
    969    * successive events.
    970    *
    971    * @param ch SAX ContentHandler object to receive the event.
    972    * @param start Offset of first character in the range.
    973    * @param length Number of characters to send.
    974    * @exception org.xml.sax.SAXException may be thrown by handler's
    975    * characters() method.
    976    */
    977   public void sendSAXcharacters(
    978           org.xml.sax.ContentHandler ch, int start, int length)
    979             throws org.xml.sax.SAXException
    980   {
    981 
    982     int startChunk = start >>> m_chunkBits;
    983     int startColumn = start & m_chunkMask;
    984     if (startColumn + length < m_chunkMask && m_innerFSB == null) {
    985         ch.characters(m_array[startChunk], startColumn, length);
    986         return;
    987     }
    988 
    989     int stop = start + length;
    990     int stopChunk = stop >>> m_chunkBits;
    991     int stopColumn = stop & m_chunkMask;
    992 
    993     for (int i = startChunk; i < stopChunk; ++i)
    994     {
    995       if (i == 0 && m_innerFSB != null)
    996         m_innerFSB.sendSAXcharacters(ch, startColumn,
    997                                      m_chunkSize - startColumn);
    998       else
    999         ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
   1000 
   1001       startColumn = 0;  // after first chunk
   1002     }
   1003 
   1004     // Last, or only, chunk
   1005     if (stopChunk == 0 && m_innerFSB != null)
   1006       m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
   1007     else if (stopColumn > startColumn)
   1008     {
   1009       ch.characters(m_array[stopChunk], startColumn,
   1010                     stopColumn - startColumn);
   1011     }
   1012   }
   1013 
   1014   /**
   1015    * Sends the specified range of characters as one or more SAX characters()
   1016    * events, normalizing the characters according to XSLT rules.
   1017    *
   1018    * @param ch SAX ContentHandler object to receive the event.
   1019    * @param start Offset of first character in the range.
   1020    * @param length Number of characters to send.
   1021    * @return normalization status to apply to next chunk (because we may
   1022    * have been called recursively to process an inner FSB):
   1023    * <dl>
   1024    * <dt>0</dt>
   1025    * <dd>if this output did not end in retained whitespace, and thus whitespace
   1026    * at the start of the following chunk (if any) should be converted to a
   1027    * single space.
   1028    * <dt>SUPPRESS_LEADING_WS</dt>
   1029    * <dd>if this output ended in retained whitespace, and thus whitespace
   1030    * at the start of the following chunk (if any) should be completely
   1031    * suppressed.</dd>
   1032    * </dd>
   1033    * </dl>
   1034    * @exception org.xml.sax.SAXException may be thrown by handler's
   1035    * characters() method.
   1036    */
   1037   public int sendNormalizedSAXcharacters(
   1038           org.xml.sax.ContentHandler ch, int start, int length)
   1039             throws org.xml.sax.SAXException
   1040   {
   1041 	// This call always starts at the beginning of the
   1042     // string being written out, either because it was called directly or
   1043     // because it was an m_innerFSB recursion. This is important since
   1044 	// it gives us a well-known initial state for this flag:
   1045 	int stateForNextChunk=SUPPRESS_LEADING_WS;
   1046 
   1047     int stop = start + length;
   1048     int startChunk = start >>> m_chunkBits;
   1049     int startColumn = start & m_chunkMask;
   1050     int stopChunk = stop >>> m_chunkBits;
   1051     int stopColumn = stop & m_chunkMask;
   1052 
   1053     for (int i = startChunk; i < stopChunk; ++i)
   1054     {
   1055       if (i == 0 && m_innerFSB != null)
   1056 				stateForNextChunk=
   1057         m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
   1058                                      m_chunkSize - startColumn);
   1059       else
   1060 				stateForNextChunk=
   1061         sendNormalizedSAXcharacters(m_array[i], startColumn,
   1062                                     m_chunkSize - startColumn,
   1063 																		ch,stateForNextChunk);
   1064 
   1065       startColumn = 0;  // after first chunk
   1066     }
   1067 
   1068     // Last, or only, chunk
   1069     if (stopChunk == 0 && m_innerFSB != null)
   1070 			stateForNextChunk= // %REVIEW% Is this update really needed?
   1071       m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
   1072     else if (stopColumn > startColumn)
   1073     {
   1074 			stateForNextChunk= // %REVIEW% Is this update really needed?
   1075       sendNormalizedSAXcharacters(m_array[stopChunk],
   1076 																	startColumn, stopColumn - startColumn,
   1077 																	ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
   1078     }
   1079 		return stateForNextChunk;
   1080   }
   1081 
   1082   static final char[] SINGLE_SPACE = {' '};
   1083 
   1084   /**
   1085    * Internal method to directly normalize and dispatch the character array.
   1086    * This version is aware of the fact that it may be called several times
   1087    * in succession if the data is made up of multiple "chunks", and thus
   1088    * must actively manage the handling of leading and trailing whitespace.
   1089    *
   1090    * Note: The recursion is due to the possible recursion of inner FSBs.
   1091    *
   1092    * @param ch The characters from the XML document.
   1093    * @param start The start position in the array.
   1094    * @param length The number of characters to read from the array.
   1095    * @param handler SAX ContentHandler object to receive the event.
   1096    * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
   1097    * This is a bitfield contining two flags, bitwise-ORed together:
   1098    * <dl>
   1099    * <dt>SUPPRESS_LEADING_WS</dt>
   1100    * <dd>When false, causes leading whitespace to be converted to a single
   1101    * space; when true, causes it to be discarded entirely.
   1102    * Should be set TRUE for the first chunk, and (in multi-chunk output)
   1103    * whenever the previous chunk ended in retained whitespace.</dd>
   1104    * <dt>SUPPRESS_TRAILING_WS</dt>
   1105    * <dd>When false, causes trailing whitespace to be converted to a single
   1106    * space; when true, causes it to be discarded entirely.
   1107    * Should be set TRUE for the last or only chunk.
   1108    * </dd>
   1109    * </dl>
   1110    * @return normalization status, as in the edgeTreatmentFlags parameter:
   1111    * <dl>
   1112    * <dt>0</dt>
   1113    * <dd>if this output did not end in retained whitespace, and thus whitespace
   1114    * at the start of the following chunk (if any) should be converted to a
   1115    * single space.
   1116    * <dt>SUPPRESS_LEADING_WS</dt>
   1117    * <dd>if this output ended in retained whitespace, and thus whitespace
   1118    * at the start of the following chunk (if any) should be completely
   1119    * suppressed.</dd>
   1120    * </dd>
   1121    * </dl>
   1122    *
   1123    *
   1124    * @exception org.xml.sax.SAXException Any SAX exception, possibly
   1125    *            wrapping another exception.
   1126    */
   1127   static int sendNormalizedSAXcharacters(char ch[],
   1128              int start, int length,
   1129              org.xml.sax.ContentHandler handler,
   1130 						 int edgeTreatmentFlags)
   1131           throws org.xml.sax.SAXException
   1132   {
   1133      boolean processingLeadingWhitespace =
   1134                        ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
   1135      boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
   1136      int currPos = start;
   1137      int limit = start+length;
   1138 
   1139      // Strip any leading spaces first, if required
   1140      if (processingLeadingWhitespace) {
   1141          for (; currPos < limit
   1142                 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
   1143               currPos++) { }
   1144 
   1145          // If we've only encountered leading spaces, the
   1146          // current state remains unchanged
   1147          if (currPos == limit) {
   1148              return edgeTreatmentFlags;
   1149          }
   1150      }
   1151 
   1152      // If we get here, there are no more leading spaces to strip
   1153      while (currPos < limit) {
   1154          int startNonWhitespace = currPos;
   1155 
   1156          // Grab a chunk of non-whitespace characters
   1157          for (; currPos < limit
   1158                 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
   1159               currPos++) { }
   1160 
   1161          // Non-whitespace seen - emit them, along with a single
   1162          // space for any preceding whitespace characters
   1163          if (startNonWhitespace != currPos) {
   1164              if (seenWhitespace) {
   1165                  handler.characters(SINGLE_SPACE, 0, 1);
   1166                  seenWhitespace = false;
   1167              }
   1168              handler.characters(ch, startNonWhitespace,
   1169                                 currPos - startNonWhitespace);
   1170          }
   1171 
   1172          int startWhitespace = currPos;
   1173 
   1174          // Consume any whitespace characters
   1175          for (; currPos < limit
   1176                 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
   1177               currPos++) { }
   1178 
   1179          if (startWhitespace != currPos) {
   1180              seenWhitespace = true;
   1181          }
   1182      }
   1183 
   1184      return (seenWhitespace ? CARRY_WS : 0)
   1185             | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
   1186   }
   1187 
   1188   /**
   1189    * Directly normalize and dispatch the character array.
   1190    *
   1191    * @param ch The characters from the XML document.
   1192    * @param start The start position in the array.
   1193    * @param length The number of characters to read from the array.
   1194    * @param handler SAX ContentHandler object to receive the event.
   1195    * @exception org.xml.sax.SAXException Any SAX exception, possibly
   1196    *            wrapping another exception.
   1197    */
   1198   public static void sendNormalizedSAXcharacters(char ch[],
   1199              int start, int length,
   1200              org.xml.sax.ContentHandler handler)
   1201           throws org.xml.sax.SAXException
   1202   {
   1203 		sendNormalizedSAXcharacters(ch, start, length,
   1204              handler, SUPPRESS_BOTH);
   1205 	}
   1206 
   1207 	/**
   1208    * Sends the specified range of characters as sax Comment.
   1209    * <p>
   1210    * Note that, unlike sendSAXcharacters, this has to be done as a single
   1211    * call to LexicalHandler#comment.
   1212    *
   1213    * @param ch SAX LexicalHandler object to receive the event.
   1214    * @param start Offset of first character in the range.
   1215    * @param length Number of characters to send.
   1216    * @exception org.xml.sax.SAXException may be thrown by handler's
   1217    * characters() method.
   1218    */
   1219   public void sendSAXComment(
   1220           org.xml.sax.ext.LexicalHandler ch, int start, int length)
   1221             throws org.xml.sax.SAXException
   1222   {
   1223 
   1224     // %OPT% Do it this way for now...
   1225     String comment = getString(start, length);
   1226     ch.comment(comment.toCharArray(), 0, length);
   1227   }
   1228 
   1229   /**
   1230    * Copies characters from this string into the destination character
   1231    * array.
   1232    *
   1233    * @param      srcBegin   index of the first character in the string
   1234    *                        to copy.
   1235    * @param      srcEnd     index after the last character in the string
   1236    *                        to copy.
   1237    * @param      dst        the destination array.
   1238    * @param      dstBegin   the start offset in the destination array.
   1239    * @exception IndexOutOfBoundsException If any of the following
   1240    *            is true:
   1241    *            <ul><li><code>srcBegin</code> is negative.
   1242    *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
   1243    *            <li><code>srcEnd</code> is greater than the length of this
   1244    *                string
   1245    *            <li><code>dstBegin</code> is negative
   1246    *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
   1247    *                <code>dst.length</code></ul>
   1248    * @exception NullPointerException if <code>dst</code> is <code>null</code>
   1249    */
   1250   private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
   1251   {
   1252     // %TBD% Joe needs to write this function.  Make public when implemented.
   1253   }
   1254 
   1255   /**
   1256    * Encapsulation c'tor. After this is called, the source FastStringBuffer
   1257    * will be reset to use the new object as its m_innerFSB, and will have
   1258    * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
   1259    * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
   1260    *
   1261    * NEEDSDOC @param source
   1262    */
   1263   private FastStringBuffer(FastStringBuffer source)
   1264   {
   1265 
   1266     // Copy existing information into new encapsulation
   1267     m_chunkBits = source.m_chunkBits;
   1268     m_maxChunkBits = source.m_maxChunkBits;
   1269     m_rebundleBits = source.m_rebundleBits;
   1270     m_chunkSize = source.m_chunkSize;
   1271     m_chunkMask = source.m_chunkMask;
   1272     m_array = source.m_array;
   1273     m_innerFSB = source.m_innerFSB;
   1274 
   1275     // These have to be adjusted because we're calling just at the time
   1276     // when we would be about to allocate another chunk
   1277     m_lastChunk = source.m_lastChunk - 1;
   1278     m_firstFree = source.m_chunkSize;
   1279 
   1280     // Establish capsule as the Inner FSB, reset chunk sizes/addressing
   1281     source.m_array = new char[16][];
   1282     source.m_innerFSB = this;
   1283 
   1284     // Since we encapsulated just as we were about to append another
   1285     // chunk, return ready to create the chunk after the innerFSB
   1286     // -- 1, not 0.
   1287     source.m_lastChunk = 1;
   1288     source.m_firstFree = 0;
   1289     source.m_chunkBits += m_rebundleBits;
   1290     source.m_chunkSize = 1 << (source.m_chunkBits);
   1291     source.m_chunkMask = source.m_chunkSize - 1;
   1292   }
   1293 }
   1294