Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 package com.google.protobuf;
     32 
     33 import java.io.ByteArrayOutputStream;
     34 import java.io.IOException;
     35 import java.io.InputStream;
     36 import java.io.OutputStream;
     37 import java.io.UnsupportedEncodingException;
     38 import java.nio.ByteBuffer;
     39 import java.util.ArrayList;
     40 import java.util.Collection;
     41 import java.util.Iterator;
     42 import java.util.List;
     43 import java.util.NoSuchElementException;
     44 
     45 /**
     46  * Immutable sequence of bytes.  Substring is supported by sharing the reference
     47  * to the immutable underlying bytes, as with {@link String}.  Concatenation is
     48  * likewise supported without copying (long strings) by building a tree of
     49  * pieces in {@link RopeByteString}.
     50  * <p>
     51  * Like {@link String}, the contents of a {@link ByteString} can never be
     52  * observed to change, not even in the presence of a data race or incorrect
     53  * API usage in the client code.
     54  *
     55  * @author crazybob (at) google.com Bob Lee
     56  * @author kenton (at) google.com Kenton Varda
     57  * @author carlanton (at) google.com Carl Haverl
     58  * @author martinrb (at) google.com Martin Buchholz
     59  */
     60 public abstract class ByteString implements Iterable<Byte> {
     61 
     62   /**
     63    * When two strings to be concatenated have a combined length shorter than
     64    * this, we just copy their bytes on {@link #concat(ByteString)}.
     65    * The trade-off is copy size versus the overhead of creating tree nodes
     66    * in {@link RopeByteString}.
     67    */
     68   static final int CONCATENATE_BY_COPY_SIZE = 128;
     69 
     70   /**
     71    * When copying an InputStream into a ByteString with .readFrom(),
     72    * the chunks in the underlying rope start at 256 bytes, but double
     73    * each iteration up to 8192 bytes.
     74    */
     75   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
     76   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k
     77 
     78   /**
     79    * Empty {@code ByteString}.
     80    */
     81   public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
     82 
     83   // This constructor is here to prevent subclassing outside of this package,
     84   ByteString() {}
     85 
     86   /**
     87    * Gets the byte at the given index. This method should be used only for
     88    * random access to individual bytes. To access bytes sequentially, use the
     89    * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
     90    * #substring(int, int)} first if necessary.
     91    *
     92    * @param index index of byte
     93    * @return the value
     94    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
     95    */
     96   public abstract byte byteAt(int index);
     97 
     98   /**
     99    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
    100    * To avoid auto-boxing, you may get the iterator manually and call
    101    * {@link ByteIterator#nextByte()}.
    102    *
    103    * @return the iterator
    104    */
    105   public abstract ByteIterator iterator();
    106 
    107   /**
    108    * This interface extends {@code Iterator<Byte>}, so that we can return an
    109    * unboxed {@code byte}.
    110    */
    111   public interface ByteIterator extends Iterator<Byte> {
    112     /**
    113      * An alternative to {@link Iterator#next()} that returns an
    114      * unboxed primitive {@code byte}.
    115      *
    116      * @return the next {@code byte} in the iteration
    117      * @throws NoSuchElementException if the iteration has no more elements
    118      */
    119     byte nextByte();
    120   }
    121 
    122   /**
    123    * Gets the number of bytes.
    124    *
    125    * @return size in bytes
    126    */
    127   public abstract int size();
    128 
    129   /**
    130    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
    131    *
    132    * @return true if this is zero bytes long
    133    */
    134   public boolean isEmpty() {
    135     return size() == 0;
    136   }
    137 
    138   // =================================================================
    139   // ByteString -> substring
    140 
    141   /**
    142    * Return the substring from {@code beginIndex}, inclusive, to the end of the
    143    * string.
    144    *
    145    * @param beginIndex start at this index
    146    * @return substring sharing underlying data
    147    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
    148    *     {@code beginIndex > size()}.
    149    */
    150   public ByteString substring(int beginIndex) {
    151     return substring(beginIndex, size());
    152   }
    153 
    154   /**
    155    * Return the substring from {@code beginIndex}, inclusive, to {@code
    156    * endIndex}, exclusive.
    157    *
    158    * @param beginIndex start at this index
    159    * @param endIndex   the last character is the one before this index
    160    * @return substring sharing underlying data
    161    * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
    162    *     {@code endIndex > size()}, or {@code beginIndex > endIndex}.
    163    */
    164   public abstract ByteString substring(int beginIndex, int endIndex);
    165 
    166   /**
    167    * Tests if this bytestring starts with the specified prefix.
    168    * Similar to {@link String#startsWith(String)}
    169    *
    170    * @param prefix the prefix.
    171    * @return <code>true</code> if the byte sequence represented by the
    172    *         argument is a prefix of the byte sequence represented by
    173    *         this string; <code>false</code> otherwise.
    174    */
    175   public boolean startsWith(ByteString prefix) {
    176     return size() >= prefix.size() &&
    177            substring(0, prefix.size()).equals(prefix);
    178   }
    179 
    180   // =================================================================
    181   // byte[] -> ByteString
    182 
    183   /**
    184    * Copies the given bytes into a {@code ByteString}.
    185    *
    186    * @param bytes source array
    187    * @param offset offset in source array
    188    * @param size number of bytes to copy
    189    * @return new {@code ByteString}
    190    */
    191   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
    192     byte[] copy = new byte[size];
    193     System.arraycopy(bytes, offset, copy, 0, size);
    194     return new LiteralByteString(copy);
    195   }
    196 
    197   /**
    198    * Copies the given bytes into a {@code ByteString}.
    199    *
    200    * @param bytes to copy
    201    * @return new {@code ByteString}
    202    */
    203   public static ByteString copyFrom(byte[] bytes) {
    204     return copyFrom(bytes, 0, bytes.length);
    205   }
    206 
    207   /**
    208    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
    209    * a {@code ByteString}.
    210    *
    211    * @param bytes source buffer
    212    * @param size number of bytes to copy
    213    * @return new {@code ByteString}
    214    */
    215   public static ByteString copyFrom(ByteBuffer bytes, int size) {
    216     byte[] copy = new byte[size];
    217     bytes.get(copy);
    218     return new LiteralByteString(copy);
    219   }
    220 
    221   /**
    222    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
    223    * a {@code ByteString}.
    224    *
    225    * @param bytes sourceBuffer
    226    * @return new {@code ByteString}
    227    */
    228   public static ByteString copyFrom(ByteBuffer bytes) {
    229     return copyFrom(bytes, bytes.remaining());
    230   }
    231 
    232   /**
    233    * Encodes {@code text} into a sequence of bytes using the named charset
    234    * and returns the result as a {@code ByteString}.
    235    *
    236    * @param text source string
    237    * @param charsetName encoding to use
    238    * @return new {@code ByteString}
    239    * @throws UnsupportedEncodingException if the encoding isn't found
    240    */
    241   public static ByteString copyFrom(String text, String charsetName)
    242       throws UnsupportedEncodingException {
    243     return new LiteralByteString(text.getBytes(charsetName));
    244   }
    245 
    246   /**
    247    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
    248    * result as a {@code ByteString}.
    249    *
    250    * @param text source string
    251    * @return new {@code ByteString}
    252    */
    253   public static ByteString copyFromUtf8(String text) {
    254     try {
    255       return new LiteralByteString(text.getBytes("UTF-8"));
    256     } catch (UnsupportedEncodingException e) {
    257       throw new RuntimeException("UTF-8 not supported?", e);
    258     }
    259   }
    260 
    261   // =================================================================
    262   // InputStream -> ByteString
    263 
    264   /**
    265    * Completely reads the given stream's bytes into a
    266    * {@code ByteString}, blocking if necessary until all bytes are
    267    * read through to the end of the stream.
    268    *
    269    * <b>Performance notes:</b> The returned {@code ByteString} is an
    270    * immutable tree of byte arrays ("chunks") of the stream data.  The
    271    * first chunk is small, with subsequent chunks each being double
    272    * the size, up to 8K.  If the caller knows the precise length of
    273    * the stream and wishes to avoid all unnecessary copies and
    274    * allocations, consider using the two-argument version of this
    275    * method, below.
    276    *
    277    * @param streamToDrain The source stream, which is read completely
    278    *     but not closed.
    279    * @return A new {@code ByteString} which is made up of chunks of
    280    *     various sizes, depending on the behavior of the underlying
    281    *     stream.
    282    * @throws IOException IOException is thrown if there is a problem
    283    *     reading the underlying stream.
    284    */
    285   public static ByteString readFrom(InputStream streamToDrain)
    286       throws IOException {
    287     return readFrom(
    288         streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
    289   }
    290 
    291   /**
    292    * Completely reads the given stream's bytes into a
    293    * {@code ByteString}, blocking if necessary until all bytes are
    294    * read through to the end of the stream.
    295    *
    296    * <b>Performance notes:</b> The returned {@code ByteString} is an
    297    * immutable tree of byte arrays ("chunks") of the stream data.  The
    298    * chunkSize parameter sets the size of these byte arrays. In
    299    * particular, if the chunkSize is precisely the same as the length
    300    * of the stream, unnecessary allocations and copies will be
    301    * avoided. Otherwise, the chunks will be of the given size, except
    302    * for the last chunk, which will be resized (via a reallocation and
    303    * copy) to contain the remainder of the stream.
    304    *
    305    * @param streamToDrain The source stream, which is read completely
    306    *     but not closed.
    307    * @param chunkSize The size of the chunks in which to read the
    308    *     stream.
    309    * @return A new {@code ByteString} which is made up of chunks of
    310    *     the given size.
    311    * @throws IOException IOException is thrown if there is a problem
    312    *     reading the underlying stream.
    313    */
    314   public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
    315       throws IOException {
    316     return readFrom(streamToDrain, chunkSize, chunkSize);
    317   }
    318 
    319   // Helper method that takes the chunk size range as a parameter.
    320   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
    321       int maxChunkSize) throws IOException {
    322     Collection<ByteString> results = new ArrayList<ByteString>();
    323 
    324     // copy the inbound bytes into a list of chunks; the chunk size
    325     // grows exponentially to support both short and long streams.
    326     int chunkSize = minChunkSize;
    327     while (true) {
    328       ByteString chunk = readChunk(streamToDrain, chunkSize);
    329       if (chunk == null) {
    330         break;
    331       }
    332       results.add(chunk);
    333       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
    334     }
    335 
    336     return ByteString.copyFrom(results);
    337   }
    338 
    339   /**
    340    * Blocks until a chunk of the given size can be made from the
    341    * stream, or EOF is reached.  Calls read() repeatedly in case the
    342    * given stream implementation doesn't completely fill the given
    343    * buffer in one read() call.
    344    *
    345    * @return A chunk of the desired size, or else a chunk as large as
    346    * was available when end of stream was reached. Returns null if the
    347    * given stream had no more data in it.
    348    */
    349   private static ByteString readChunk(InputStream in, final int chunkSize)
    350       throws IOException {
    351       final byte[] buf = new byte[chunkSize];
    352       int bytesRead = 0;
    353       while (bytesRead < chunkSize) {
    354         final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
    355         if (count == -1) {
    356           break;
    357         }
    358         bytesRead += count;
    359       }
    360 
    361       if (bytesRead == 0) {
    362         return null;
    363       } else {
    364         return ByteString.copyFrom(buf, 0, bytesRead);
    365       }
    366   }
    367 
    368   // =================================================================
    369   // Multiple ByteStrings -> One ByteString
    370 
    371   /**
    372    * Concatenate the given {@code ByteString} to this one. Short concatenations,
    373    * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
    374    * produced by copying the underlying bytes (as per Rope.java, <a
    375    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
    376    * BAP95 </a>. In general, the concatenate involves no copying.
    377    *
    378    * @param other string to concatenate
    379    * @return a new {@code ByteString} instance
    380    */
    381   public ByteString concat(ByteString other) {
    382     int thisSize = size();
    383     int otherSize = other.size();
    384     if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
    385       throw new IllegalArgumentException("ByteString would be too long: " +
    386                                          thisSize + "+" + otherSize);
    387     }
    388 
    389     return RopeByteString.concatenate(this, other);
    390   }
    391 
    392   /**
    393    * Concatenates all byte strings in the iterable and returns the result.
    394    * This is designed to run in O(list size), not O(total bytes).
    395    *
    396    * <p>The returned {@code ByteString} is not necessarily a unique object.
    397    * If the list is empty, the returned object is the singleton empty
    398    * {@code ByteString}.  If the list has only one element, that
    399    * {@code ByteString} will be returned without copying.
    400    *
    401    * @param byteStrings strings to be concatenated
    402    * @return new {@code ByteString}
    403    */
    404   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
    405     Collection<ByteString> collection;
    406     if (!(byteStrings instanceof Collection)) {
    407       collection = new ArrayList<ByteString>();
    408       for (ByteString byteString : byteStrings) {
    409         collection.add(byteString);
    410       }
    411     } else {
    412       collection = (Collection<ByteString>) byteStrings;
    413     }
    414     ByteString result;
    415     if (collection.isEmpty()) {
    416       result = EMPTY;
    417     } else {
    418       result = balancedConcat(collection.iterator(), collection.size());
    419     }
    420     return result;
    421   }
    422 
    423   // Internal function used by copyFrom(Iterable<ByteString>).
    424   // Create a balanced concatenation of the next "length" elements from the
    425   // iterable.
    426   private static ByteString balancedConcat(Iterator<ByteString> iterator,
    427       int length) {
    428     assert length >= 1;
    429     ByteString result;
    430     if (length == 1) {
    431       result = iterator.next();
    432     } else {
    433       int halfLength = length >>> 1;
    434       ByteString left = balancedConcat(iterator, halfLength);
    435       ByteString right = balancedConcat(iterator, length - halfLength);
    436       result = left.concat(right);
    437     }
    438     return result;
    439   }
    440 
    441   // =================================================================
    442   // ByteString -> byte[]
    443 
    444   /**
    445    * Copies bytes into a buffer at the given offset.
    446    *
    447    * @param target buffer to copy into
    448    * @param offset in the target buffer
    449    * @throws IndexOutOfBoundsException if the offset is negative or too large
    450    */
    451   public void copyTo(byte[] target, int offset) {
    452     copyTo(target, 0, offset, size());
    453   }
    454 
    455   /**
    456    * Copies bytes into a buffer.
    457    *
    458    * @param target       buffer to copy into
    459    * @param sourceOffset offset within these bytes
    460    * @param targetOffset offset within the target buffer
    461    * @param numberToCopy number of bytes to copy
    462    * @throws IndexOutOfBoundsException if an offset or size is negative or too
    463    *     large
    464    */
    465   public void copyTo(byte[] target, int sourceOffset, int targetOffset,
    466       int numberToCopy) {
    467     if (sourceOffset < 0) {
    468       throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
    469     }
    470     if (targetOffset < 0) {
    471       throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
    472     }
    473     if (numberToCopy < 0) {
    474       throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
    475     }
    476     if (sourceOffset + numberToCopy > size()) {
    477       throw new IndexOutOfBoundsException(
    478           "Source end offset < 0: " + (sourceOffset + numberToCopy));
    479     }
    480     if (targetOffset + numberToCopy > target.length) {
    481       throw new IndexOutOfBoundsException(
    482           "Target end offset < 0: " + (targetOffset + numberToCopy));
    483     }
    484     if (numberToCopy > 0) {
    485       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
    486     }
    487   }
    488 
    489   /**
    490    * Internal (package private) implementation of
    491    * @link{#copyTo(byte[],int,int,int}.
    492    * It assumes that all error checking has already been performed and that
    493    * @code{numberToCopy > 0}.
    494    */
    495   protected abstract void copyToInternal(byte[] target, int sourceOffset,
    496       int targetOffset, int numberToCopy);
    497 
    498   /**
    499    * Copies bytes into a ByteBuffer.
    500    *
    501    * @param target ByteBuffer to copy into.
    502    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
    503    * @throws java.nio.BufferOverflowException if the {@code target}'s
    504    *     remaining() space is not large enough to hold the data.
    505    */
    506   public abstract void copyTo(ByteBuffer target);
    507 
    508   /**
    509    * Copies bytes to a {@code byte[]}.
    510    *
    511    * @return copied bytes
    512    */
    513   public byte[] toByteArray() {
    514     int size = size();
    515     byte[] result = new byte[size];
    516     copyToInternal(result, 0, 0, size);
    517     return result;
    518   }
    519 
    520   /**
    521    * Writes the complete contents of this byte string to
    522    * the specified output stream argument.
    523    *
    524    * @param  out  the output stream to which to write the data.
    525    * @throws IOException  if an I/O error occurs.
    526    */
    527   public abstract void writeTo(OutputStream out) throws IOException;
    528 
    529   /**
    530    * Constructs a read-only {@code java.nio.ByteBuffer} whose content
    531    * is equal to the contents of this byte string.
    532    * The result uses the same backing array as the byte string, if possible.
    533    *
    534    * @return wrapped bytes
    535    */
    536   public abstract ByteBuffer asReadOnlyByteBuffer();
    537 
    538   /**
    539    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
    540    * such that the concatenation of their contents is equal to the contents
    541    * of this byte string.  The result uses the same backing arrays as the
    542    * byte string.
    543    * <p>
    544    * By returning a list, implementations of this method may be able to avoid
    545    * copying even when there are multiple backing arrays.
    546    *
    547    * @return a list of wrapped bytes
    548    */
    549   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
    550 
    551   /**
    552    * Constructs a new {@code String} by decoding the bytes using the
    553    * specified charset.
    554    *
    555    * @param charsetName encode using this charset
    556    * @return new string
    557    * @throws UnsupportedEncodingException if charset isn't recognized
    558    */
    559   public abstract String toString(String charsetName)
    560       throws UnsupportedEncodingException;
    561 
    562   // =================================================================
    563   // UTF-8 decoding
    564 
    565   /**
    566    * Constructs a new {@code String} by decoding the bytes as UTF-8.
    567    *
    568    * @return new string using UTF-8 encoding
    569    */
    570   public String toStringUtf8() {
    571     try {
    572       return toString("UTF-8");
    573     } catch (UnsupportedEncodingException e) {
    574       throw new RuntimeException("UTF-8 not supported?", e);
    575     }
    576   }
    577 
    578   /**
    579    * Tells whether this {@code ByteString} represents a well-formed UTF-8
    580    * byte sequence, such that the original bytes can be converted to a
    581    * String object and then round tripped back to bytes without loss.
    582    *
    583    * <p>More precisely, returns {@code true} whenever: <pre> {@code
    584    * Arrays.equals(byteString.toByteArray(),
    585    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
    586    * }</pre>
    587    *
    588    * <p>This method returns {@code false} for "overlong" byte sequences,
    589    * as well as for 3-byte sequences that would map to a surrogate
    590    * character, in accordance with the restricted definition of UTF-8
    591    * introduced in Unicode 3.1.  Note that the UTF-8 decoder included in
    592    * Oracle's JDK has been modified to also reject "overlong" byte
    593    * sequences, but (as of 2011) still accepts 3-byte surrogate
    594    * character byte sequences.
    595    *
    596    * <p>See the Unicode Standard,</br>
    597    * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
    598    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
    599    *
    600    * @return whether the bytes in this {@code ByteString} are a
    601    * well-formed UTF-8 byte sequence
    602    */
    603   public abstract boolean isValidUtf8();
    604 
    605   /**
    606    * Tells whether the given byte sequence is a well-formed, malformed, or
    607    * incomplete UTF-8 byte sequence.  This method accepts and returns a partial
    608    * state result, allowing the bytes for a complete UTF-8 byte sequence to be
    609    * composed from multiple {@code ByteString} segments.
    610    *
    611    * @param state either {@code 0} (if this is the initial decoding operation)
    612    *     or the value returned from a call to a partial decoding method for the
    613    *     previous bytes
    614    * @param offset offset of the first byte to check
    615    * @param length number of bytes to check
    616    *
    617    * @return {@code -1} if the partial byte sequence is definitely malformed,
    618    * {@code 0} if it is well-formed (no additional input needed), or, if the
    619    * byte sequence is "incomplete", i.e. apparently terminated in the middle of
    620    * a character, an opaque integer "state" value containing enough information
    621    * to decode the character when passed to a subsequent invocation of a
    622    * partial decoding method.
    623    */
    624   protected abstract int partialIsValidUtf8(int state, int offset, int length);
    625 
    626   // =================================================================
    627   // equals() and hashCode()
    628 
    629   @Override
    630   public abstract boolean equals(Object o);
    631 
    632   /**
    633    * Return a non-zero hashCode depending only on the sequence of bytes
    634    * in this ByteString.
    635    *
    636    * @return hashCode value for this object
    637    */
    638   @Override
    639   public abstract int hashCode();
    640 
    641   // =================================================================
    642   // Input stream
    643 
    644   /**
    645    * Creates an {@code InputStream} which can be used to read the bytes.
    646    * <p>
    647    * The {@link InputStream} returned by this method is guaranteed to be
    648    * completely non-blocking.  The method {@link InputStream#available()}
    649    * returns the number of bytes remaining in the stream. The methods
    650    * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
    651    * and {@link InputStream#skip(long)} will read/skip as many bytes as are
    652    * available.
    653    * <p>
    654    * The methods in the returned {@link InputStream} might <b>not</b> be
    655    * thread safe.
    656    *
    657    * @return an input stream that returns the bytes of this byte string.
    658    */
    659   public abstract InputStream newInput();
    660 
    661   /**
    662    * Creates a {@link CodedInputStream} which can be used to read the bytes.
    663    * Using this is often more efficient than creating a {@link CodedInputStream}
    664    * that wraps the result of {@link #newInput()}.
    665    *
    666    * @return stream based on wrapped data
    667    */
    668   public abstract CodedInputStream newCodedInput();
    669 
    670   // =================================================================
    671   // Output stream
    672 
    673   /**
    674    * Creates a new {@link Output} with the given initial capacity. Call {@link
    675    * Output#toByteString()} to create the {@code ByteString} instance.
    676    * <p>
    677    * A {@link ByteString.Output} offers the same functionality as a
    678    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
    679    * rather than a {@code byte} array.
    680    *
    681    * @param initialCapacity estimate of number of bytes to be written
    682    * @return {@code OutputStream} for building a {@code ByteString}
    683    */
    684   public static Output newOutput(int initialCapacity) {
    685     return new Output(initialCapacity);
    686   }
    687 
    688   /**
    689    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
    690    * the {@code ByteString} instance.
    691    * <p>
    692    * A {@link ByteString.Output} offers the same functionality as a
    693    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
    694    * rather than a {@code byte array}.
    695    *
    696    * @return {@code OutputStream} for building a {@code ByteString}
    697    */
    698   public static Output newOutput() {
    699     return new Output(CONCATENATE_BY_COPY_SIZE);
    700   }
    701 
    702   /**
    703    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
    704    * create the {@code ByteString} instance.
    705    */
    706   public static final class Output extends OutputStream {
    707     // Implementation note.
    708     // The public methods of this class must be synchronized.  ByteStrings
    709     // are guaranteed to be immutable.  Without some sort of locking, it could
    710     // be possible for one thread to call toByteSring(), while another thread
    711     // is still modifying the underlying byte array.
    712 
    713     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
    714     // argument passed by user, indicating initial capacity.
    715     private final int initialCapacity;
    716     // ByteStrings to be concatenated to create the result
    717     private final ArrayList<ByteString> flushedBuffers;
    718     // Total number of bytes in the ByteStrings of flushedBuffers
    719     private int flushedBuffersTotalBytes;
    720     // Current buffer to which we are writing
    721     private byte[] buffer;
    722     // Location in buffer[] to which we write the next byte.
    723     private int bufferPos;
    724 
    725     /**
    726      * Creates a new ByteString output stream with the specified
    727      * initial capacity.
    728      *
    729      * @param initialCapacity  the initial capacity of the output stream.
    730      */
    731     Output(int initialCapacity) {
    732       if (initialCapacity < 0) {
    733         throw new IllegalArgumentException("Buffer size < 0");
    734       }
    735       this.initialCapacity = initialCapacity;
    736       this.flushedBuffers = new ArrayList<ByteString>();
    737       this.buffer = new byte[initialCapacity];
    738     }
    739 
    740     @Override
    741     public synchronized void write(int b) {
    742       if (bufferPos == buffer.length) {
    743         flushFullBuffer(1);
    744       }
    745       buffer[bufferPos++] = (byte)b;
    746     }
    747 
    748     @Override
    749     public synchronized void write(byte[] b, int offset, int length)  {
    750       if (length <= buffer.length - bufferPos) {
    751         // The bytes can fit into the current buffer.
    752         System.arraycopy(b, offset, buffer, bufferPos, length);
    753         bufferPos += length;
    754       } else {
    755         // Use up the current buffer
    756         int copySize  = buffer.length - bufferPos;
    757         System.arraycopy(b, offset, buffer, bufferPos, copySize);
    758         offset += copySize;
    759         length -= copySize;
    760         // Flush the buffer, and get a new buffer at least big enough to cover
    761         // what we still need to output
    762         flushFullBuffer(length);
    763         System.arraycopy(b, offset, buffer, 0 /* count */, length);
    764         bufferPos = length;
    765       }
    766     }
    767 
    768     /**
    769      * Creates a byte string. Its size is the current size of this output
    770      * stream and its output has been copied to it.
    771      *
    772      * @return  the current contents of this output stream, as a byte string.
    773      */
    774     public synchronized ByteString toByteString() {
    775       flushLastBuffer();
    776       return ByteString.copyFrom(flushedBuffers);
    777     }
    778 
    779     /**
    780      * Implement java.util.Arrays.copyOf() for jdk 1.5.
    781      */
    782     private byte[] copyArray(byte[] buffer, int length) {
    783       byte[] result = new byte[length];
    784       System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length));
    785       return result;
    786     }
    787 
    788     /**
    789      * Writes the complete contents of this byte array output stream to
    790      * the specified output stream argument.
    791      *
    792      * @param out the output stream to which to write the data.
    793      * @throws IOException  if an I/O error occurs.
    794      */
    795     public void writeTo(OutputStream out) throws IOException {
    796       ByteString[] cachedFlushBuffers;
    797       byte[] cachedBuffer;
    798       int cachedBufferPos;
    799       synchronized (this) {
    800         // Copy the information we need into local variables so as to hold
    801         // the lock for as short a time as possible.
    802         cachedFlushBuffers =
    803             flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
    804         cachedBuffer = buffer;
    805         cachedBufferPos = bufferPos;
    806       }
    807       for (ByteString byteString : cachedFlushBuffers) {
    808         byteString.writeTo(out);
    809       }
    810 
    811       out.write(copyArray(cachedBuffer, cachedBufferPos));
    812     }
    813 
    814     /**
    815      * Returns the current size of the output stream.
    816      *
    817      * @return  the current size of the output stream
    818      */
    819     public synchronized int size() {
    820       return flushedBuffersTotalBytes + bufferPos;
    821     }
    822 
    823     /**
    824      * Resets this stream, so that all currently accumulated output in the
    825      * output stream is discarded. The output stream can be used again,
    826      * reusing the already allocated buffer space.
    827      */
    828     public synchronized void reset() {
    829       flushedBuffers.clear();
    830       flushedBuffersTotalBytes = 0;
    831       bufferPos = 0;
    832     }
    833 
    834     @Override
    835     public String toString() {
    836       return String.format("<ByteString.Output@%s size=%d>",
    837           Integer.toHexString(System.identityHashCode(this)), size());
    838     }
    839 
    840     /**
    841      * Internal function used by writers.  The current buffer is full, and the
    842      * writer needs a new buffer whose size is at least the specified minimum
    843      * size.
    844      */
    845     private void flushFullBuffer(int minSize)  {
    846       flushedBuffers.add(new LiteralByteString(buffer));
    847       flushedBuffersTotalBytes += buffer.length;
    848       // We want to increase our total capacity by 50%, but as a minimum,
    849       // the new buffer should also at least be >= minSize and
    850       // >= initial Capacity.
    851       int newSize = Math.max(initialCapacity,
    852           Math.max(minSize, flushedBuffersTotalBytes >>> 1));
    853       buffer = new byte[newSize];
    854       bufferPos = 0;
    855     }
    856 
    857     /**
    858      * Internal function used by {@link #toByteString()}. The current buffer may
    859      * or may not be full, but it needs to be flushed.
    860      */
    861     private void flushLastBuffer()  {
    862       if (bufferPos < buffer.length) {
    863         if (bufferPos > 0) {
    864           byte[] bufferCopy = copyArray(buffer, bufferPos);
    865           flushedBuffers.add(new LiteralByteString(bufferCopy));
    866         }
    867         // We reuse this buffer for further writes.
    868       } else {
    869         // Buffer is completely full.  Huzzah.
    870         flushedBuffers.add(new LiteralByteString(buffer));
    871         // 99% of the time, we're not going to use this OutputStream again.
    872         // We set buffer to an empty byte stream so that we're handling this
    873         // case without wasting space.  In the rare case that more writes
    874         // *do* occur, this empty buffer will be flushed and an appropriately
    875         // sized new buffer will be created.
    876         buffer = EMPTY_BYTE_ARRAY;
    877       }
    878       flushedBuffersTotalBytes += bufferPos;
    879       bufferPos = 0;
    880     }
    881   }
    882 
    883   /**
    884    * Constructs a new {@code ByteString} builder, which allows you to
    885    * efficiently construct a {@code ByteString} by writing to a {@link
    886    * CodedOutputStream}. Using this is much more efficient than calling {@code
    887    * newOutput()} and wrapping that in a {@code CodedOutputStream}.
    888    *
    889    * <p>This is package-private because it's a somewhat confusing interface.
    890    * Users can call {@link Message#toByteString()} instead of calling this
    891    * directly.
    892    *
    893    * @param size The target byte size of the {@code ByteString}.  You must write
    894    *     exactly this many bytes before building the result.
    895    * @return the builder
    896    */
    897   static CodedBuilder newCodedBuilder(int size) {
    898     return new CodedBuilder(size);
    899   }
    900 
    901   /** See {@link ByteString#newCodedBuilder(int)}. */
    902   static final class CodedBuilder {
    903     private final CodedOutputStream output;
    904     private final byte[] buffer;
    905 
    906     private CodedBuilder(int size) {
    907       buffer = new byte[size];
    908       output = CodedOutputStream.newInstance(buffer);
    909     }
    910 
    911     public ByteString build() {
    912       output.checkNoSpaceLeft();
    913 
    914       // We can be confident that the CodedOutputStream will not modify the
    915       // underlying bytes anymore because it already wrote all of them.  So,
    916       // no need to make a copy.
    917       return new LiteralByteString(buffer);
    918     }
    919 
    920     public CodedOutputStream getCodedOutput() {
    921       return output;
    922     }
    923   }
    924 
    925   // =================================================================
    926   // Methods {@link RopeByteString} needs on instances, which aren't part of the
    927   // public API.
    928 
    929   /**
    930    * Return the depth of the tree representing this {@code ByteString}, if any,
    931    * whose root is this node. If this is a leaf node, return 0.
    932    *
    933    * @return tree depth or zero
    934    */
    935   protected abstract int getTreeDepth();
    936 
    937   /**
    938    * Return {@code true} if this ByteString is literal (a leaf node) or a
    939    * flat-enough tree in the sense of {@link RopeByteString}.
    940    *
    941    * @return true if the tree is flat enough
    942    */
    943   protected abstract boolean isBalanced();
    944 
    945   /**
    946    * Return the cached hash code if available.
    947    *
    948    * @return value of cached hash code or 0 if not computed yet
    949    */
    950   protected abstract int peekCachedHashCode();
    951 
    952   /**
    953    * Compute the hash across the value bytes starting with the given hash, and
    954    * return the result.  This is used to compute the hash across strings
    955    * represented as a set of pieces by allowing the hash computation to be
    956    * continued from piece to piece.
    957    *
    958    * @param h starting hash value
    959    * @param offset offset into this value to start looking at data values
    960    * @param length number of data values to include in the hash computation
    961    * @return ending hash value
    962    */
    963   protected abstract int partialHash(int h, int offset, int length);
    964 
    965   @Override
    966   public String toString() {
    967     return String.format("<ByteString@%s size=%d>",
    968         Integer.toHexString(System.identityHashCode(this)), size());
    969   }
    970 }
    971