Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 package com.google.protobuf;
     32 
     33 import java.io.IOException;
     34 import java.io.InputStream;
     35 import java.io.OutputStream;
     36 import java.io.UnsupportedEncodingException;
     37 import java.io.ByteArrayInputStream;
     38 import java.nio.ByteBuffer;
     39 import java.util.ArrayList;
     40 import java.util.Arrays;
     41 import java.util.Iterator;
     42 import java.util.List;
     43 import java.util.NoSuchElementException;
     44 import java.util.Stack;
     45 
     46 /**
     47  * Class to represent {@code ByteStrings} formed by concatenation of other
     48  * ByteStrings, without copying the data in the pieces. The concatenation is
     49  * represented as a tree whose leaf nodes are each a {@link LiteralByteString}.
     50  *
     51  * <p>Most of the operation here is inspired by the now-famous paper <a
     52  * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
     53  * BAP95 </a> Ropes: an Alternative to Strings hans-j. boehm, russ atkinson and
     54  * michael plass
     55  *
     56  * <p>The algorithms described in the paper have been implemented for character
     57  * strings in {@link com.google.common.string.Rope} and in the c++ class {@code
     58  * cord.cc}.
     59  *
     60  * <p>Fundamentally the Rope algorithm represents the collection of pieces as a
     61  * binary tree. BAP95 uses a Fibonacci bound relating depth to a minimum
     62  * sequence length, sequences that are too short relative to their depth cause a
     63  * tree rebalance.  More precisely, a tree of depth d is "balanced" in the
     64  * terminology of BAP95 if its length is at least F(d+2), where F(n) is the
     65  * n-the Fibonacci number. Thus for depths 0, 1, 2, 3, 4, 5,... we have minimum
     66  * lengths 1, 2, 3, 5, 8, 13,...
     67  *
     68  * @author carlanton (at) google.com (Carl Haverl)
     69  */
     70 class RopeByteString extends ByteString {
     71 
     72   /**
     73    * BAP95. Let Fn be the nth Fibonacci number. A {@link RopeByteString} of
     74    * depth n is "balanced", i.e flat enough, if its length is at least Fn+2,
     75    * e.g. a "balanced" {@link RopeByteString} of depth 1 must have length at
     76    * least 2, of depth 4 must have length >= 8, etc.
     77    *
     78    * <p>There's nothing special about using the Fibonacci numbers for this, but
     79    * they are a reasonable sequence for encapsulating the idea that we are OK
     80    * with longer strings being encoded in deeper binary trees.
     81    *
     82    * <p>For 32-bit integers, this array has length 46.
     83    */
     84   private static final int[] minLengthByDepth;
     85 
     86   static {
     87     // Dynamically generate the list of Fibonacci numbers the first time this
     88     // class is accessed.
     89     List<Integer> numbers = new ArrayList<Integer>();
     90 
     91     // we skip the first Fibonacci number (1).  So instead of: 1 1 2 3 5 8 ...
     92     // we have: 1 2 3 5 8 ...
     93     int f1 = 1;
     94     int f2 = 1;
     95 
     96     // get all the values until we roll over.
     97     while (f2 > 0) {
     98       numbers.add(f2);
     99       int temp = f1 + f2;
    100       f1 = f2;
    101       f2 = temp;
    102     }
    103 
    104     // we include this here so that we can index this array to [x + 1] in the
    105     // loops below.
    106     numbers.add(Integer.MAX_VALUE);
    107     minLengthByDepth = new int[numbers.size()];
    108     for (int i = 0; i < minLengthByDepth.length; i++) {
    109       // unbox all the values
    110       minLengthByDepth[i] = numbers.get(i);
    111     }
    112   }
    113 
    114   private final int totalLength;
    115   private final ByteString left;
    116   private final ByteString right;
    117   private final int leftLength;
    118   private final int treeDepth;
    119 
    120   /**
    121    * Create a new RopeByteString, which can be thought of as a new tree node, by
    122    * recording references to the two given strings.
    123    *
    124    * @param left  string on the left of this node, should have {@code size() >
    125    *              0}
    126    * @param right string on the right of this node, should have {@code size() >
    127    *              0}
    128    */
    129   private RopeByteString(ByteString left, ByteString right) {
    130     this.left = left;
    131     this.right = right;
    132     leftLength = left.size();
    133     totalLength = leftLength + right.size();
    134     treeDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
    135   }
    136 
    137   /**
    138    * Concatenate the given strings while performing various optimizations to
    139    * slow the growth rate of tree depth and tree node count. The result is
    140    * either a {@link LiteralByteString} or a {@link RopeByteString}
    141    * depending on which optimizations, if any, were applied.
    142    *
    143    * <p>Small pieces of length less than {@link
    144    * ByteString#CONCATENATE_BY_COPY_SIZE} may be copied by value here, as in
    145    * BAP95.  Large pieces are referenced without copy.
    146    *
    147    * @param left  string on the left
    148    * @param right string on the right
    149    * @return concatenation representing the same sequence as the given strings
    150    */
    151   static ByteString concatenate(ByteString left, ByteString right) {
    152     ByteString result;
    153     RopeByteString leftRope =
    154         (left instanceof RopeByteString) ? (RopeByteString) left : null;
    155     if (right.size() == 0) {
    156       result = left;
    157     } else if (left.size() == 0) {
    158       result = right;
    159     } else {
    160       int newLength = left.size() + right.size();
    161       if (newLength < ByteString.CONCATENATE_BY_COPY_SIZE) {
    162         // Optimization from BAP95: For short (leaves in paper, but just short
    163         // here) total length, do a copy of data to a new leaf.
    164         result = concatenateBytes(left, right);
    165       } else if (leftRope != null
    166           && leftRope.right.size() + right.size() < CONCATENATE_BY_COPY_SIZE) {
    167         // Optimization from BAP95: As an optimization of the case where the
    168         // ByteString is constructed by repeated concatenate, recognize the case
    169         // where a short string is concatenated to a left-hand node whose
    170         // right-hand branch is short.  In the paper this applies to leaves, but
    171         // we just look at the length here. This has the advantage of shedding
    172         // references to unneeded data when substrings have been taken.
    173         //
    174         // When we recognize this case, we do a copy of the data and create a
    175         // new parent node so that the depth of the result is the same as the
    176         // given left tree.
    177         ByteString newRight = concatenateBytes(leftRope.right, right);
    178         result = new RopeByteString(leftRope.left, newRight);
    179       } else if (leftRope != null
    180           && leftRope.left.getTreeDepth() > leftRope.right.getTreeDepth()
    181           && leftRope.getTreeDepth() > right.getTreeDepth()) {
    182         // Typically for concatenate-built strings the left-side is deeper than
    183         // the right.  This is our final attempt to concatenate without
    184         // increasing the tree depth.  We'll redo the the node on the RHS.  This
    185         // is yet another optimization for building the string by repeatedly
    186         // concatenating on the right.
    187         ByteString newRight = new RopeByteString(leftRope.right, right);
    188         result = new RopeByteString(leftRope.left, newRight);
    189       } else {
    190         // Fine, we'll add a node and increase the tree depth--unless we
    191         // rebalance ;^)
    192         int newDepth = Math.max(left.getTreeDepth(), right.getTreeDepth()) + 1;
    193         if (newLength >= minLengthByDepth[newDepth]) {
    194           // The tree is shallow enough, so don't rebalance
    195           result = new RopeByteString(left, right);
    196         } else {
    197           result = new Balancer().balance(left, right);
    198         }
    199       }
    200     }
    201     return result;
    202   }
    203 
    204   /**
    205    * Concatenates two strings by copying data values. This is called in a few
    206    * cases in order to reduce the growth of the number of tree nodes.
    207    *
    208    * @param left  string on the left
    209    * @param right string on the right
    210    * @return string formed by copying data bytes
    211    */
    212   private static LiteralByteString concatenateBytes(ByteString left,
    213       ByteString right) {
    214     int leftSize = left.size();
    215     int rightSize = right.size();
    216     byte[] bytes = new byte[leftSize + rightSize];
    217     left.copyTo(bytes, 0, 0, leftSize);
    218     right.copyTo(bytes, 0, leftSize, rightSize);
    219     return new LiteralByteString(bytes);  // Constructor wraps bytes
    220   }
    221 
    222   /**
    223    * Create a new RopeByteString for testing only while bypassing all the
    224    * defenses of {@link #concatenate(ByteString, ByteString)}. This allows
    225    * testing trees of specific structure. We are also able to insert empty
    226    * leaves, though these are dis-allowed, so that we can make sure the
    227    * implementation can withstand their presence.
    228    *
    229    * @param left  string on the left of this node
    230    * @param right string on the right of this node
    231    * @return an unsafe instance for testing only
    232    */
    233   static RopeByteString newInstanceForTest(ByteString left, ByteString right) {
    234     return new RopeByteString(left, right);
    235   }
    236 
    237   /**
    238    * Gets the byte at the given index.
    239    * Throws {@link ArrayIndexOutOfBoundsException} for backwards-compatibility
    240    * reasons although it would more properly be {@link
    241    * IndexOutOfBoundsException}.
    242    *
    243    * @param index index of byte
    244    * @return the value
    245    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
    246    */
    247   @Override
    248   public byte byteAt(int index) {
    249     if (index < 0) {
    250       throw new ArrayIndexOutOfBoundsException("Index < 0: " + index);
    251     }
    252     if (index > totalLength) {
    253       throw new ArrayIndexOutOfBoundsException(
    254           "Index > length: " + index + ", " + totalLength);
    255     }
    256 
    257     byte result;
    258     // Find the relevant piece by recursive descent
    259     if (index < leftLength) {
    260       result = left.byteAt(index);
    261     } else {
    262       result = right.byteAt(index - leftLength);
    263     }
    264     return result;
    265   }
    266 
    267   @Override
    268   public int size() {
    269     return totalLength;
    270   }
    271 
    272   // =================================================================
    273   // Pieces
    274 
    275   @Override
    276   protected int getTreeDepth() {
    277     return treeDepth;
    278   }
    279 
    280   /**
    281    * Determines if the tree is balanced according to BAP95, which means the tree
    282    * is flat-enough with respect to the bounds. Note that this definition of
    283    * balanced is one where sub-trees of balanced trees are not necessarily
    284    * balanced.
    285    *
    286    * @return true if the tree is balanced
    287    */
    288   @Override
    289   protected boolean isBalanced() {
    290     return totalLength >= minLengthByDepth[treeDepth];
    291   }
    292 
    293   /**
    294    * Takes a substring of this one. This involves recursive descent along the
    295    * left and right edges of the substring, and referencing any wholly contained
    296    * segments in between. Any leaf nodes entirely uninvolved in the substring
    297    * will not be referenced by the substring.
    298    *
    299    * <p>Substrings of {@code length < 2} should result in at most a single
    300    * recursive call chain, terminating at a leaf node. Thus the result will be a
    301    * {@link LiteralByteString}. {@link #RopeByteString(ByteString,
    302    * ByteString)}.
    303    *
    304    * @param beginIndex start at this index
    305    * @param endIndex   the last character is the one before this index
    306    * @return substring leaf node or tree
    307    */
    308   @Override
    309   public ByteString substring(int beginIndex, int endIndex) {
    310     if (beginIndex < 0) {
    311       throw new IndexOutOfBoundsException(
    312           "Beginning index: " + beginIndex + " < 0");
    313     }
    314     if (endIndex > totalLength) {
    315       throw new IndexOutOfBoundsException(
    316           "End index: " + endIndex + " > " + totalLength);
    317     }
    318     int substringLength = endIndex - beginIndex;
    319     if (substringLength < 0) {
    320       throw new IndexOutOfBoundsException(
    321           "Beginning index larger than ending index: " + beginIndex + ", "
    322               + endIndex);
    323     }
    324 
    325     ByteString result;
    326     if (substringLength == 0) {
    327       // Empty substring
    328       result = ByteString.EMPTY;
    329     } else if (substringLength == totalLength) {
    330       // The whole string
    331       result = this;
    332     } else {
    333       // Proper substring
    334       if (endIndex <= leftLength) {
    335         // Substring on the left
    336         result = left.substring(beginIndex, endIndex);
    337       } else if (beginIndex >= leftLength) {
    338         // Substring on the right
    339         result = right
    340             .substring(beginIndex - leftLength, endIndex - leftLength);
    341       } else {
    342         // Split substring
    343         ByteString leftSub = left.substring(beginIndex);
    344         ByteString rightSub = right.substring(0, endIndex - leftLength);
    345         // Intentionally not rebalancing, since in many cases these two
    346         // substrings will already be less deep than the top-level
    347         // RopeByteString we're taking a substring of.
    348         result = new RopeByteString(leftSub, rightSub);
    349       }
    350     }
    351     return result;
    352   }
    353 
    354   // =================================================================
    355   // ByteString -> byte[]
    356 
    357   @Override
    358   protected void copyToInternal(byte[] target, int sourceOffset,
    359       int targetOffset, int numberToCopy) {
    360    if (sourceOffset + numberToCopy <= leftLength) {
    361       left.copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
    362     } else if (sourceOffset >= leftLength) {
    363       right.copyToInternal(target, sourceOffset - leftLength, targetOffset,
    364           numberToCopy);
    365     } else {
    366       int leftLength = this.leftLength - sourceOffset;
    367       left.copyToInternal(target, sourceOffset, targetOffset, leftLength);
    368       right.copyToInternal(target, 0, targetOffset + leftLength,
    369           numberToCopy - leftLength);
    370     }
    371   }
    372 
    373   @Override
    374   public void copyTo(ByteBuffer target) {
    375     left.copyTo(target);
    376     right.copyTo(target);
    377   }
    378 
    379   @Override
    380   public ByteBuffer asReadOnlyByteBuffer() {
    381     ByteBuffer byteBuffer = ByteBuffer.wrap(toByteArray());
    382     return byteBuffer.asReadOnlyBuffer();
    383   }
    384 
    385   @Override
    386   public List<ByteBuffer> asReadOnlyByteBufferList() {
    387     // Walk through the list of LiteralByteString's that make up this
    388     // rope, and add each one as a read-only ByteBuffer.
    389     List<ByteBuffer> result = new ArrayList<ByteBuffer>();
    390     PieceIterator pieces = new PieceIterator(this);
    391     while (pieces.hasNext()) {
    392       LiteralByteString byteString = pieces.next();
    393       result.add(byteString.asReadOnlyByteBuffer());
    394     }
    395     return result;
    396   }
    397 
    398   @Override
    399   public void writeTo(OutputStream outputStream) throws IOException {
    400     left.writeTo(outputStream);
    401     right.writeTo(outputStream);
    402   }
    403 
    404   @Override
    405   public String toString(String charsetName)
    406       throws UnsupportedEncodingException {
    407     return new String(toByteArray(), charsetName);
    408   }
    409 
    410   // =================================================================
    411   // UTF-8 decoding
    412 
    413   @Override
    414   public boolean isValidUtf8() {
    415     int leftPartial = left.partialIsValidUtf8(Utf8.COMPLETE, 0, leftLength);
    416     int state = right.partialIsValidUtf8(leftPartial, 0, right.size());
    417     return state == Utf8.COMPLETE;
    418   }
    419 
    420   @Override
    421   protected int partialIsValidUtf8(int state, int offset, int length) {
    422     int toIndex = offset + length;
    423     if (toIndex <= leftLength) {
    424       return left.partialIsValidUtf8(state, offset, length);
    425     } else if (offset >= leftLength) {
    426       return right.partialIsValidUtf8(state, offset - leftLength, length);
    427     } else {
    428       int leftLength = this.leftLength - offset;
    429       int leftPartial = left.partialIsValidUtf8(state, offset, leftLength);
    430       return right.partialIsValidUtf8(leftPartial, 0, length - leftLength);
    431     }
    432   }
    433 
    434   // =================================================================
    435   // equals() and hashCode()
    436 
    437   @Override
    438   public boolean equals(Object other) {
    439     if (other == this) {
    440       return true;
    441     }
    442     if (!(other instanceof ByteString)) {
    443       return false;
    444     }
    445 
    446     ByteString otherByteString = (ByteString) other;
    447     if (totalLength != otherByteString.size()) {
    448       return false;
    449     }
    450     if (totalLength == 0) {
    451       return true;
    452     }
    453 
    454     // You don't really want to be calling equals on long strings, but since
    455     // we cache the hashCode, we effectively cache inequality. We use the cached
    456     // hashCode if it's already computed.  It's arguable we should compute the
    457     // hashCode here, and if we're going to be testing a bunch of byteStrings,
    458     // it might even make sense.
    459     if (hash != 0) {
    460       int cachedOtherHash = otherByteString.peekCachedHashCode();
    461       if (cachedOtherHash != 0 && hash != cachedOtherHash) {
    462         return false;
    463       }
    464     }
    465 
    466     return equalsFragments(otherByteString);
    467   }
    468 
    469   /**
    470    * Determines if this string is equal to another of the same length by
    471    * iterating over the leaf nodes. On each step of the iteration, the
    472    * overlapping segments of the leaves are compared.
    473    *
    474    * @param other string of the same length as this one
    475    * @return true if the values of this string equals the value of the given
    476    *         one
    477    */
    478   private boolean equalsFragments(ByteString other) {
    479     int thisOffset = 0;
    480     Iterator<LiteralByteString> thisIter = new PieceIterator(this);
    481     LiteralByteString thisString = thisIter.next();
    482 
    483     int thatOffset = 0;
    484     Iterator<LiteralByteString> thatIter = new PieceIterator(other);
    485     LiteralByteString thatString = thatIter.next();
    486 
    487     int pos = 0;
    488     while (true) {
    489       int thisRemaining = thisString.size() - thisOffset;
    490       int thatRemaining = thatString.size() - thatOffset;
    491       int bytesToCompare = Math.min(thisRemaining, thatRemaining);
    492 
    493       // At least one of the offsets will be zero
    494       boolean stillEqual = (thisOffset == 0)
    495           ? thisString.equalsRange(thatString, thatOffset, bytesToCompare)
    496           : thatString.equalsRange(thisString, thisOffset, bytesToCompare);
    497       if (!stillEqual) {
    498         return false;
    499       }
    500 
    501       pos += bytesToCompare;
    502       if (pos >= totalLength) {
    503         if (pos == totalLength) {
    504           return true;
    505         }
    506         throw new IllegalStateException();
    507       }
    508       // We always get to the end of at least one of the pieces
    509       if (bytesToCompare == thisRemaining) { // If reached end of this
    510         thisOffset = 0;
    511         thisString = thisIter.next();
    512       } else {
    513         thisOffset += bytesToCompare;
    514       }
    515       if (bytesToCompare == thatRemaining) { // If reached end of that
    516         thatOffset = 0;
    517         thatString = thatIter.next();
    518       } else {
    519         thatOffset += bytesToCompare;
    520       }
    521     }
    522   }
    523 
    524   /**
    525    * Cached hash value.  Intentionally accessed via a data race, which is safe
    526    * because of the Java Memory Model's "no out-of-thin-air values" guarantees
    527    * for ints.
    528    */
    529   private int hash = 0;
    530 
    531   @Override
    532   public int hashCode() {
    533     int h = hash;
    534 
    535     if (h == 0) {
    536       h = totalLength;
    537       h = partialHash(h, 0, totalLength);
    538       if (h == 0) {
    539         h = 1;
    540       }
    541       hash = h;
    542     }
    543     return h;
    544   }
    545 
    546   @Override
    547   protected int peekCachedHashCode() {
    548     return hash;
    549   }
    550 
    551   @Override
    552   protected int partialHash(int h, int offset, int length) {
    553     int toIndex = offset + length;
    554     if (toIndex <= leftLength) {
    555       return left.partialHash(h, offset, length);
    556     } else if (offset >= leftLength) {
    557       return right.partialHash(h, offset - leftLength, length);
    558     } else {
    559       int leftLength = this.leftLength - offset;
    560       int leftPartial = left.partialHash(h, offset, leftLength);
    561       return right.partialHash(leftPartial, 0, length - leftLength);
    562     }
    563   }
    564 
    565   // =================================================================
    566   // Input stream
    567 
    568   @Override
    569   public CodedInputStream newCodedInput() {
    570     return CodedInputStream.newInstance(new RopeInputStream());
    571   }
    572 
    573   @Override
    574   public InputStream newInput() {
    575     return new RopeInputStream();
    576   }
    577 
    578   /**
    579    * This class implements the balancing algorithm of BAP95. In the paper the
    580    * authors use an array to keep track of pieces, while here we use a stack.
    581    * The tree is balanced by traversing subtrees in left to right order, and the
    582    * stack always contains the part of the string we've traversed so far.
    583    *
    584    * <p>One surprising aspect of the algorithm is the result of balancing is not
    585    * necessarily balanced, though it is nearly balanced.  For details, see
    586    * BAP95.
    587    */
    588   private static class Balancer {
    589     // Stack containing the part of the string, starting from the left, that
    590     // we've already traversed.  The final string should be the equivalent of
    591     // concatenating the strings on the stack from bottom to top.
    592     private final Stack<ByteString> prefixesStack = new Stack<ByteString>();
    593 
    594     private ByteString balance(ByteString left, ByteString right) {
    595       doBalance(left);
    596       doBalance(right);
    597 
    598       // Sweep stack to gather the result
    599       ByteString partialString = prefixesStack.pop();
    600       while (!prefixesStack.isEmpty()) {
    601         ByteString newLeft = prefixesStack.pop();
    602         partialString = new RopeByteString(newLeft, partialString);
    603       }
    604       // We should end up with a RopeByteString since at a minimum we will
    605       // create one from concatenating left and right
    606       return partialString;
    607     }
    608 
    609     private void doBalance(ByteString root) {
    610       // BAP95: Insert balanced subtrees whole. This means the result might not
    611       // be balanced, leading to repeated rebalancings on concatenate. However,
    612       // these rebalancings are shallow due to ignoring balanced subtrees, and
    613       // relatively few calls to insert() result.
    614       if (root.isBalanced()) {
    615         insert(root);
    616       } else if (root instanceof RopeByteString) {
    617         RopeByteString rbs = (RopeByteString) root;
    618         doBalance(rbs.left);
    619         doBalance(rbs.right);
    620       } else {
    621         throw new IllegalArgumentException(
    622             "Has a new type of ByteString been created? Found " +
    623                 root.getClass());
    624       }
    625     }
    626 
    627     /**
    628      * Push a string on the balance stack (BAP95).  BAP95 uses an array and
    629      * calls the elements in the array 'bins'.  We instead use a stack, so the
    630      * 'bins' of lengths are represented by differences between the elements of
    631      * minLengthByDepth.
    632      *
    633      * <p>If the length bin for our string, and all shorter length bins, are
    634      * empty, we just push it on the stack.  Otherwise, we need to start
    635      * concatenating, putting the given string in the "middle" and continuing
    636      * until we land in an empty length bin that matches the length of our
    637      * concatenation.
    638      *
    639      * @param byteString string to place on the balance stack
    640      */
    641     private void insert(ByteString byteString) {
    642       int depthBin = getDepthBinForLength(byteString.size());
    643       int binEnd = minLengthByDepth[depthBin + 1];
    644 
    645       // BAP95: Concatenate all trees occupying bins representing the length of
    646       // our new piece or of shorter pieces, to the extent that is possible.
    647       // The goal is to clear the bin which our piece belongs in, but that may
    648       // not be entirely possible if there aren't enough longer bins occupied.
    649       if (prefixesStack.isEmpty() || prefixesStack.peek().size() >= binEnd) {
    650         prefixesStack.push(byteString);
    651       } else {
    652         int binStart = minLengthByDepth[depthBin];
    653 
    654         // Concatenate the subtrees of shorter length
    655         ByteString newTree = prefixesStack.pop();
    656         while (!prefixesStack.isEmpty()
    657             && prefixesStack.peek().size() < binStart) {
    658           ByteString left = prefixesStack.pop();
    659           newTree = new RopeByteString(left, newTree);
    660         }
    661 
    662         // Concatenate the given string
    663         newTree = new RopeByteString(newTree, byteString);
    664 
    665         // Continue concatenating until we land in an empty bin
    666         while (!prefixesStack.isEmpty()) {
    667           depthBin = getDepthBinForLength(newTree.size());
    668           binEnd = minLengthByDepth[depthBin + 1];
    669           if (prefixesStack.peek().size() < binEnd) {
    670             ByteString left = prefixesStack.pop();
    671             newTree = new RopeByteString(left, newTree);
    672           } else {
    673             break;
    674           }
    675         }
    676         prefixesStack.push(newTree);
    677       }
    678     }
    679 
    680     private int getDepthBinForLength(int length) {
    681       int depth = Arrays.binarySearch(minLengthByDepth, length);
    682       if (depth < 0) {
    683         // It wasn't an exact match, so convert to the index of the containing
    684         // fragment, which is one less even than the insertion point.
    685         int insertionPoint = -(depth + 1);
    686         depth = insertionPoint - 1;
    687       }
    688 
    689       return depth;
    690     }
    691   }
    692 
    693   /**
    694    * This class is a continuable tree traversal, which keeps the state
    695    * information which would exist on the stack in a recursive traversal instead
    696    * on a stack of "Bread Crumbs". The maximum depth of the stack in this
    697    * iterator is the same as the depth of the tree being traversed.
    698    *
    699    * <p>This iterator is used to implement
    700    * {@link RopeByteString#equalsFragments(ByteString)}.
    701    */
    702   private static class PieceIterator implements Iterator<LiteralByteString> {
    703 
    704     private final Stack<RopeByteString> breadCrumbs =
    705         new Stack<RopeByteString>();
    706     private LiteralByteString next;
    707 
    708     private PieceIterator(ByteString root) {
    709       next = getLeafByLeft(root);
    710     }
    711 
    712     private LiteralByteString getLeafByLeft(ByteString root) {
    713       ByteString pos = root;
    714       while (pos instanceof RopeByteString) {
    715         RopeByteString rbs = (RopeByteString) pos;
    716         breadCrumbs.push(rbs);
    717         pos = rbs.left;
    718       }
    719       return (LiteralByteString) pos;
    720     }
    721 
    722     private LiteralByteString getNextNonEmptyLeaf() {
    723       while (true) {
    724         // Almost always, we go through this loop exactly once.  However, if
    725         // we discover an empty string in the rope, we toss it and try again.
    726         if (breadCrumbs.isEmpty()) {
    727           return null;
    728         } else {
    729           LiteralByteString result = getLeafByLeft(breadCrumbs.pop().right);
    730           if (!result.isEmpty()) {
    731             return result;
    732           }
    733         }
    734       }
    735     }
    736 
    737     public boolean hasNext() {
    738       return next != null;
    739     }
    740 
    741     /**
    742      * Returns the next item and advances one {@code LiteralByteString}.
    743      *
    744      * @return next non-empty LiteralByteString or {@code null}
    745      */
    746     public LiteralByteString next() {
    747       if (next == null) {
    748         throw new NoSuchElementException();
    749       }
    750       LiteralByteString result = next;
    751       next = getNextNonEmptyLeaf();
    752       return result;
    753     }
    754 
    755     public void remove() {
    756       throw new UnsupportedOperationException();
    757     }
    758   }
    759 
    760   // =================================================================
    761   // ByteIterator
    762 
    763   @Override
    764   public ByteIterator iterator() {
    765     return new RopeByteIterator();
    766   }
    767 
    768   private class RopeByteIterator implements ByteString.ByteIterator {
    769 
    770     private final PieceIterator pieces;
    771     private ByteIterator bytes;
    772     int bytesRemaining;
    773 
    774     private RopeByteIterator() {
    775       pieces = new PieceIterator(RopeByteString.this);
    776       bytes = pieces.next().iterator();
    777       bytesRemaining = size();
    778     }
    779 
    780     public boolean hasNext() {
    781       return (bytesRemaining > 0);
    782     }
    783 
    784     public Byte next() {
    785       return nextByte(); // Does not instantiate a Byte
    786     }
    787 
    788     public byte nextByte() {
    789       if (!bytes.hasNext()) {
    790         bytes = pieces.next().iterator();
    791       }
    792       --bytesRemaining;
    793       return bytes.nextByte();
    794     }
    795 
    796     public void remove() {
    797       throw new UnsupportedOperationException();
    798     }
    799   }
    800 
    801   /**
    802    * This class is the {@link RopeByteString} equivalent for
    803    * {@link ByteArrayInputStream}.
    804    */
    805   private class RopeInputStream extends InputStream {
    806     // Iterates through the pieces of the rope
    807     private PieceIterator pieceIterator;
    808     // The current piece
    809     private LiteralByteString currentPiece;
    810     // The size of the current piece
    811     private int currentPieceSize;
    812     // The index of the next byte to read in the current piece
    813     private int currentPieceIndex;
    814     // The offset of the start of the current piece in the rope byte string
    815     private int currentPieceOffsetInRope;
    816     // Offset in the buffer at which user called mark();
    817     private int mark;
    818 
    819     public RopeInputStream() {
    820       initialize();
    821     }
    822 
    823     @Override
    824     public int read(byte b[], int offset, int length)  {
    825       if (b == null) {
    826         throw new NullPointerException();
    827       } else if (offset < 0 || length < 0 || length > b.length - offset) {
    828         throw new IndexOutOfBoundsException();
    829       }
    830       return readSkipInternal(b, offset, length);
    831     }
    832 
    833     @Override
    834     public long skip(long length) {
    835       if (length < 0) {
    836         throw new IndexOutOfBoundsException();
    837       } else if (length > Integer.MAX_VALUE) {
    838         length = Integer.MAX_VALUE;
    839       }
    840       return readSkipInternal(null, 0, (int) length);
    841     }
    842 
    843     /**
    844      * Internal implementation of read and skip.  If b != null, then read the
    845      * next {@code length} bytes into the buffer {@code b} at
    846      * offset {@code offset}.  If b == null, then skip the next {@code length)
    847      * bytes.
    848      * <p>
    849      * This method assumes that all error checking has already happened.
    850      * <p>
    851      * Returns the actual number of bytes read or skipped.
    852      */
    853     private int readSkipInternal(byte b[], int offset, int length)  {
    854       int bytesRemaining = length;
    855       while (bytesRemaining > 0) {
    856         advanceIfCurrentPieceFullyRead();
    857         if (currentPiece == null) {
    858           if (bytesRemaining == length) {
    859              // We didn't manage to read anything
    860              return -1;
    861            }
    862           break;
    863         } else {
    864           // Copy the bytes from this piece.
    865           int currentPieceRemaining = currentPieceSize - currentPieceIndex;
    866           int count = Math.min(currentPieceRemaining, bytesRemaining);
    867           if (b != null) {
    868             currentPiece.copyTo(b, currentPieceIndex, offset, count);
    869             offset += count;
    870           }
    871           currentPieceIndex += count;
    872           bytesRemaining -= count;
    873         }
    874       }
    875        // Return the number of bytes read.
    876       return length - bytesRemaining;
    877     }
    878 
    879     @Override
    880     public int read() throws IOException {
    881       advanceIfCurrentPieceFullyRead();
    882       if (currentPiece == null) {
    883         return -1;
    884       } else {
    885         return currentPiece.byteAt(currentPieceIndex++) & 0xFF;
    886       }
    887     }
    888 
    889     @Override
    890     public int available() throws IOException {
    891       int bytesRead = currentPieceOffsetInRope + currentPieceIndex;
    892       return RopeByteString.this.size() - bytesRead;
    893     }
    894 
    895     @Override
    896     public boolean markSupported() {
    897       return true;
    898     }
    899 
    900     @Override
    901     public void mark(int readAheadLimit) {
    902       // Set the mark to our position in the byte string
    903       mark = currentPieceOffsetInRope + currentPieceIndex;
    904     }
    905 
    906     @Override
    907     public synchronized void reset() {
    908       // Just reinitialize and skip the specified number of bytes.
    909       initialize();
    910       readSkipInternal(null, 0, mark);
    911     }
    912 
    913     /** Common initialization code used by both the constructor and reset() */
    914     private void initialize() {
    915       pieceIterator = new PieceIterator(RopeByteString.this);
    916       currentPiece = pieceIterator.next();
    917       currentPieceSize = currentPiece.size();
    918       currentPieceIndex = 0;
    919       currentPieceOffsetInRope = 0;
    920     }
    921 
    922     /**
    923      * Skips to the next piece if we have read all the data in the current
    924      * piece.  Sets currentPiece to null if we have reached the end of the
    925      * input.
    926      */
    927     private void advanceIfCurrentPieceFullyRead() {
    928       if (currentPiece != null && currentPieceIndex == currentPieceSize) {
    929         // Generally, we can only go through this loop at most once, since
    930         // empty strings can't end up in a rope.  But better to test.
    931         currentPieceOffsetInRope += currentPieceSize;
    932         currentPieceIndex = 0;
    933         if (pieceIterator.hasNext()) {
    934           currentPiece = pieceIterator.next();
    935           currentPieceSize = currentPiece.size();
    936         } else {
    937           currentPiece = null;
    938           currentPieceSize = 0;
    939         }
    940       }
    941     }
    942   }
    943 }
    944