Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 package com.google.protobuf;
     32 
     33 import java.io.ByteArrayOutputStream;
     34 import java.io.IOException;
     35 import java.io.InputStream;
     36 import java.io.OutputStream;
     37 import java.io.UnsupportedEncodingException;
     38 import java.nio.ByteBuffer;
     39 import java.util.ArrayList;
     40 import java.util.Arrays;
     41 import java.util.Collection;
     42 import java.util.Iterator;
     43 import java.util.List;
     44 import java.util.NoSuchElementException;
     45 
     46 /**
     47  * Immutable sequence of bytes.  Substring is supported by sharing the reference
     48  * to the immutable underlying bytes, as with {@link String}.  Concatenation is
     49  * likewise supported without copying (long strings) by building a tree of
     50  * pieces in {@link RopeByteString}.
     51  * <p>
     52  * Like {@link String}, the contents of a {@link ByteString} can never be
     53  * observed to change, not even in the presence of a data race or incorrect
     54  * API usage in the client code.
     55  *
     56  * @author crazybob (at) google.com Bob Lee
     57  * @author kenton (at) google.com Kenton Varda
     58  * @author carlanton (at) google.com Carl Haverl
     59  * @author martinrb (at) google.com Martin Buchholz
     60  */
     61 public abstract class ByteString implements Iterable<Byte> {
     62 
     63   /**
     64    * When two strings to be concatenated have a combined length shorter than
     65    * this, we just copy their bytes on {@link #concat(ByteString)}.
     66    * The trade-off is copy size versus the overhead of creating tree nodes
     67    * in {@link RopeByteString}.
     68    */
     69   static final int CONCATENATE_BY_COPY_SIZE = 128;
     70 
     71   /**
     72    * When copying an InputStream into a ByteString with .readFrom(),
     73    * the chunks in the underlying rope start at 256 bytes, but double
     74    * each iteration up to 8192 bytes.
     75    */
     76   static final int MIN_READ_FROM_CHUNK_SIZE = 0x100;  // 256b
     77   static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000;  // 8k
     78 
     79   /**
     80    * Empty {@code ByteString}.
     81    */
     82   public static final ByteString EMPTY = new LiteralByteString(new byte[0]);
     83 
     84   // This constructor is here to prevent subclassing outside of this package,
     85   ByteString() {}
     86 
     87   /**
     88    * Gets the byte at the given index. This method should be used only for
     89    * random access to individual bytes. To access bytes sequentially, use the
     90    * {@link ByteIterator} returned by {@link #iterator()}, and call {@link
     91    * #substring(int, int)} first if necessary.
     92    *
     93    * @param index index of byte
     94    * @return the value
     95    * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size
     96    */
     97   public abstract byte byteAt(int index);
     98 
     99   /**
    100    * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString.
    101    * To avoid auto-boxing, you may get the iterator manually and call
    102    * {@link ByteIterator#nextByte()}.
    103    *
    104    * @return the iterator
    105    */
    106   public abstract ByteIterator iterator();
    107 
    108   /**
    109    * This interface extends {@code Iterator<Byte>}, so that we can return an
    110    * unboxed {@code byte}.
    111    */
    112   public interface ByteIterator extends Iterator<Byte> {
    113     /**
    114      * An alternative to {@link Iterator#next()} that returns an
    115      * unboxed primitive {@code byte}.
    116      *
    117      * @return the next {@code byte} in the iteration
    118      * @throws NoSuchElementException if the iteration has no more elements
    119      */
    120     byte nextByte();
    121   }
    122 
    123   /**
    124    * Gets the number of bytes.
    125    *
    126    * @return size in bytes
    127    */
    128   public abstract int size();
    129 
    130   /**
    131    * Returns {@code true} if the size is {@code 0}, {@code false} otherwise.
    132    *
    133    * @return true if this is zero bytes long
    134    */
    135   public boolean isEmpty() {
    136     return size() == 0;
    137   }
    138 
    139   // =================================================================
    140   // ByteString -> substring
    141 
    142   /**
    143    * Return the substring from {@code beginIndex}, inclusive, to the end of the
    144    * string.
    145    *
    146    * @param beginIndex start at this index
    147    * @return substring sharing underlying data
    148    * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or
    149    *     {@code beginIndex > size()}.
    150    */
    151   public ByteString substring(int beginIndex) {
    152     return substring(beginIndex, size());
    153   }
    154 
    155   /**
    156    * Return the substring from {@code beginIndex}, inclusive, to {@code
    157    * endIndex}, exclusive.
    158    *
    159    * @param beginIndex start at this index
    160    * @param endIndex   the last character is the one before this index
    161    * @return substring sharing underlying data
    162    * @throws IndexOutOfBoundsException if {@code beginIndex < 0},
    163    *     {@code endIndex > size()}, or {@code beginIndex > endIndex}.
    164    */
    165   public abstract ByteString substring(int beginIndex, int endIndex);
    166 
    167   /**
    168    * Tests if this bytestring starts with the specified prefix.
    169    * Similar to {@link String#startsWith(String)}
    170    *
    171    * @param prefix the prefix.
    172    * @return <code>true</code> if the byte sequence represented by the
    173    *         argument is a prefix of the byte sequence represented by
    174    *         this string; <code>false</code> otherwise.
    175    */
    176   public boolean startsWith(ByteString prefix) {
    177     return size() >= prefix.size() &&
    178            substring(0, prefix.size()).equals(prefix);
    179   }
    180 
    181   // =================================================================
    182   // byte[] -> ByteString
    183 
    184   /**
    185    * Copies the given bytes into a {@code ByteString}.
    186    *
    187    * @param bytes source array
    188    * @param offset offset in source array
    189    * @param size number of bytes to copy
    190    * @return new {@code ByteString}
    191    */
    192   public static ByteString copyFrom(byte[] bytes, int offset, int size) {
    193     byte[] copy = new byte[size];
    194     System.arraycopy(bytes, offset, copy, 0, size);
    195     return new LiteralByteString(copy);
    196   }
    197 
    198   /**
    199    * Copies the given bytes into a {@code ByteString}.
    200    *
    201    * @param bytes to copy
    202    * @return new {@code ByteString}
    203    */
    204   public static ByteString copyFrom(byte[] bytes) {
    205     return copyFrom(bytes, 0, bytes.length);
    206   }
    207 
    208   /**
    209    * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into
    210    * a {@code ByteString}.
    211    *
    212    * @param bytes source buffer
    213    * @param size number of bytes to copy
    214    * @return new {@code ByteString}
    215    */
    216   public static ByteString copyFrom(ByteBuffer bytes, int size) {
    217     byte[] copy = new byte[size];
    218     bytes.get(copy);
    219     return new LiteralByteString(copy);
    220   }
    221 
    222   /**
    223    * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into
    224    * a {@code ByteString}.
    225    *
    226    * @param bytes sourceBuffer
    227    * @return new {@code ByteString}
    228    */
    229   public static ByteString copyFrom(ByteBuffer bytes) {
    230     return copyFrom(bytes, bytes.remaining());
    231   }
    232 
    233   /**
    234    * Encodes {@code text} into a sequence of bytes using the named charset
    235    * and returns the result as a {@code ByteString}.
    236    *
    237    * @param text source string
    238    * @param charsetName encoding to use
    239    * @return new {@code ByteString}
    240    * @throws UnsupportedEncodingException if the encoding isn't found
    241    */
    242   public static ByteString copyFrom(String text, String charsetName)
    243       throws UnsupportedEncodingException {
    244     return new LiteralByteString(text.getBytes(charsetName));
    245   }
    246 
    247   /**
    248    * Encodes {@code text} into a sequence of UTF-8 bytes and returns the
    249    * result as a {@code ByteString}.
    250    *
    251    * @param text source string
    252    * @return new {@code ByteString}
    253    */
    254   public static ByteString copyFromUtf8(String text) {
    255     try {
    256       return new LiteralByteString(text.getBytes("UTF-8"));
    257     } catch (UnsupportedEncodingException e) {
    258       throw new RuntimeException("UTF-8 not supported?", e);
    259     }
    260   }
    261 
    262   // =================================================================
    263   // InputStream -> ByteString
    264 
    265   /**
    266    * Completely reads the given stream's bytes into a
    267    * {@code ByteString}, blocking if necessary until all bytes are
    268    * read through to the end of the stream.
    269    *
    270    * <b>Performance notes:</b> The returned {@code ByteString} is an
    271    * immutable tree of byte arrays ("chunks") of the stream data.  The
    272    * first chunk is small, with subsequent chunks each being double
    273    * the size, up to 8K.  If the caller knows the precise length of
    274    * the stream and wishes to avoid all unnecessary copies and
    275    * allocations, consider using the two-argument version of this
    276    * method, below.
    277    *
    278    * @param streamToDrain The source stream, which is read completely
    279    *     but not closed.
    280    * @return A new {@code ByteString} which is made up of chunks of
    281    *     various sizes, depending on the behavior of the underlying
    282    *     stream.
    283    * @throws IOException IOException is thrown if there is a problem
    284    *     reading the underlying stream.
    285    */
    286   public static ByteString readFrom(InputStream streamToDrain)
    287       throws IOException {
    288     return readFrom(
    289         streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE);
    290   }
    291 
    292   /**
    293    * Completely reads the given stream's bytes into a
    294    * {@code ByteString}, blocking if necessary until all bytes are
    295    * read through to the end of the stream.
    296    *
    297    * <b>Performance notes:</b> The returned {@code ByteString} is an
    298    * immutable tree of byte arrays ("chunks") of the stream data.  The
    299    * chunkSize parameter sets the size of these byte arrays. In
    300    * particular, if the chunkSize is precisely the same as the length
    301    * of the stream, unnecessary allocations and copies will be
    302    * avoided. Otherwise, the chunks will be of the given size, except
    303    * for the last chunk, which will be resized (via a reallocation and
    304    * copy) to contain the remainder of the stream.
    305    *
    306    * @param streamToDrain The source stream, which is read completely
    307    *     but not closed.
    308    * @param chunkSize The size of the chunks in which to read the
    309    *     stream.
    310    * @return A new {@code ByteString} which is made up of chunks of
    311    *     the given size.
    312    * @throws IOException IOException is thrown if there is a problem
    313    *     reading the underlying stream.
    314    */
    315   public static ByteString readFrom(InputStream streamToDrain, int chunkSize)
    316       throws IOException {
    317     return readFrom(streamToDrain, chunkSize, chunkSize);
    318   }
    319 
    320   // Helper method that takes the chunk size range as a parameter.
    321   public static ByteString readFrom(InputStream streamToDrain, int minChunkSize,
    322       int maxChunkSize) throws IOException {
    323     Collection<ByteString> results = new ArrayList<ByteString>();
    324 
    325     // copy the inbound bytes into a list of chunks; the chunk size
    326     // grows exponentially to support both short and long streams.
    327     int chunkSize = minChunkSize;
    328     while (true) {
    329       ByteString chunk = readChunk(streamToDrain, chunkSize);
    330       if (chunk == null) {
    331         break;
    332       }
    333       results.add(chunk);
    334       chunkSize = Math.min(chunkSize * 2, maxChunkSize);
    335     }
    336 
    337     return ByteString.copyFrom(results);
    338   }
    339 
    340   /**
    341    * Blocks until a chunk of the given size can be made from the
    342    * stream, or EOF is reached.  Calls read() repeatedly in case the
    343    * given stream implementation doesn't completely fill the given
    344    * buffer in one read() call.
    345    *
    346    * @return A chunk of the desired size, or else a chunk as large as
    347    * was available when end of stream was reached. Returns null if the
    348    * given stream had no more data in it.
    349    */
    350   private static ByteString readChunk(InputStream in, final int chunkSize)
    351       throws IOException {
    352       final byte[] buf = new byte[chunkSize];
    353       int bytesRead = 0;
    354       while (bytesRead < chunkSize) {
    355         final int count = in.read(buf, bytesRead, chunkSize - bytesRead);
    356         if (count == -1) {
    357           break;
    358         }
    359         bytesRead += count;
    360       }
    361 
    362       if (bytesRead == 0) {
    363         return null;
    364       } else {
    365         return ByteString.copyFrom(buf, 0, bytesRead);
    366       }
    367   }
    368 
    369   // =================================================================
    370   // Multiple ByteStrings -> One ByteString
    371 
    372   /**
    373    * Concatenate the given {@code ByteString} to this one. Short concatenations,
    374    * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are
    375    * produced by copying the underlying bytes (as per Rope.java, <a
    376    * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf">
    377    * BAP95 </a>. In general, the concatenate involves no copying.
    378    *
    379    * @param other string to concatenate
    380    * @return a new {@code ByteString} instance
    381    */
    382   public ByteString concat(ByteString other) {
    383     int thisSize = size();
    384     int otherSize = other.size();
    385     if ((long) thisSize + otherSize >= Integer.MAX_VALUE) {
    386       throw new IllegalArgumentException("ByteString would be too long: " +
    387                                          thisSize + "+" + otherSize);
    388     }
    389 
    390     return RopeByteString.concatenate(this, other);
    391   }
    392 
    393   /**
    394    * Concatenates all byte strings in the iterable and returns the result.
    395    * This is designed to run in O(list size), not O(total bytes).
    396    *
    397    * <p>The returned {@code ByteString} is not necessarily a unique object.
    398    * If the list is empty, the returned object is the singleton empty
    399    * {@code ByteString}.  If the list has only one element, that
    400    * {@code ByteString} will be returned without copying.
    401    *
    402    * @param byteStrings strings to be concatenated
    403    * @return new {@code ByteString}
    404    */
    405   public static ByteString copyFrom(Iterable<ByteString> byteStrings) {
    406     Collection<ByteString> collection;
    407     if (!(byteStrings instanceof Collection)) {
    408       collection = new ArrayList<ByteString>();
    409       for (ByteString byteString : byteStrings) {
    410         collection.add(byteString);
    411       }
    412     } else {
    413       collection = (Collection<ByteString>) byteStrings;
    414     }
    415     ByteString result;
    416     if (collection.isEmpty()) {
    417       result = EMPTY;
    418     } else {
    419       result = balancedConcat(collection.iterator(), collection.size());
    420     }
    421     return result;
    422   }
    423 
    424   // Internal function used by copyFrom(Iterable<ByteString>).
    425   // Create a balanced concatenation of the next "length" elements from the
    426   // iterable.
    427   private static ByteString balancedConcat(Iterator<ByteString> iterator,
    428       int length) {
    429     assert length >= 1;
    430     ByteString result;
    431     if (length == 1) {
    432       result = iterator.next();
    433     } else {
    434       int halfLength = length >>> 1;
    435       ByteString left = balancedConcat(iterator, halfLength);
    436       ByteString right = balancedConcat(iterator, length - halfLength);
    437       result = left.concat(right);
    438     }
    439     return result;
    440   }
    441 
    442   // =================================================================
    443   // ByteString -> byte[]
    444 
    445   /**
    446    * Copies bytes into a buffer at the given offset.
    447    *
    448    * @param target buffer to copy into
    449    * @param offset in the target buffer
    450    * @throws IndexOutOfBoundsException if the offset is negative or too large
    451    */
    452   public void copyTo(byte[] target, int offset) {
    453     copyTo(target, 0, offset, size());
    454   }
    455 
    456   /**
    457    * Copies bytes into a buffer.
    458    *
    459    * @param target       buffer to copy into
    460    * @param sourceOffset offset within these bytes
    461    * @param targetOffset offset within the target buffer
    462    * @param numberToCopy number of bytes to copy
    463    * @throws IndexOutOfBoundsException if an offset or size is negative or too
    464    *     large
    465    */
    466   public void copyTo(byte[] target, int sourceOffset, int targetOffset,
    467       int numberToCopy) {
    468     if (sourceOffset < 0) {
    469       throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset);
    470     }
    471     if (targetOffset < 0) {
    472       throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset);
    473     }
    474     if (numberToCopy < 0) {
    475       throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy);
    476     }
    477     if (sourceOffset + numberToCopy > size()) {
    478       throw new IndexOutOfBoundsException(
    479           "Source end offset < 0: " + (sourceOffset + numberToCopy));
    480     }
    481     if (targetOffset + numberToCopy > target.length) {
    482       throw new IndexOutOfBoundsException(
    483           "Target end offset < 0: " + (targetOffset + numberToCopy));
    484     }
    485     if (numberToCopy > 0) {
    486       copyToInternal(target, sourceOffset, targetOffset, numberToCopy);
    487     }
    488   }
    489 
    490   /**
    491    * Internal (package private) implementation of
    492    * @link{#copyTo(byte[],int,int,int}.
    493    * It assumes that all error checking has already been performed and that
    494    * @code{numberToCopy > 0}.
    495    */
    496   protected abstract void copyToInternal(byte[] target, int sourceOffset,
    497       int targetOffset, int numberToCopy);
    498 
    499   /**
    500    * Copies bytes into a ByteBuffer.
    501    *
    502    * @param target ByteBuffer to copy into.
    503    * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only
    504    * @throws java.nio.BufferOverflowException if the {@code target}'s
    505    *     remaining() space is not large enough to hold the data.
    506    */
    507   public abstract void copyTo(ByteBuffer target);
    508 
    509   /**
    510    * Copies bytes to a {@code byte[]}.
    511    *
    512    * @return copied bytes
    513    */
    514   public byte[] toByteArray() {
    515     int size = size();
    516     byte[] result = new byte[size];
    517     copyToInternal(result, 0, 0, size);
    518     return result;
    519   }
    520 
    521   /**
    522    * Writes the complete contents of this byte string to
    523    * the specified output stream argument.
    524    *
    525    * @param  out  the output stream to which to write the data.
    526    * @throws IOException  if an I/O error occurs.
    527    */
    528   public abstract void writeTo(OutputStream out) throws IOException;
    529 
    530   /**
    531    * Constructs a read-only {@code java.nio.ByteBuffer} whose content
    532    * is equal to the contents of this byte string.
    533    * The result uses the same backing array as the byte string, if possible.
    534    *
    535    * @return wrapped bytes
    536    */
    537   public abstract ByteBuffer asReadOnlyByteBuffer();
    538 
    539   /**
    540    * Constructs a list of read-only {@code java.nio.ByteBuffer} objects
    541    * such that the concatenation of their contents is equal to the contents
    542    * of this byte string.  The result uses the same backing arrays as the
    543    * byte string.
    544    * <p>
    545    * By returning a list, implementations of this method may be able to avoid
    546    * copying even when there are multiple backing arrays.
    547    *
    548    * @return a list of wrapped bytes
    549    */
    550   public abstract List<ByteBuffer> asReadOnlyByteBufferList();
    551 
    552   /**
    553    * Constructs a new {@code String} by decoding the bytes using the
    554    * specified charset.
    555    *
    556    * @param charsetName encode using this charset
    557    * @return new string
    558    * @throws UnsupportedEncodingException if charset isn't recognized
    559    */
    560   public abstract String toString(String charsetName)
    561       throws UnsupportedEncodingException;
    562 
    563   // =================================================================
    564   // UTF-8 decoding
    565 
    566   /**
    567    * Constructs a new {@code String} by decoding the bytes as UTF-8.
    568    *
    569    * @return new string using UTF-8 encoding
    570    */
    571   public String toStringUtf8() {
    572     try {
    573       return toString("UTF-8");
    574     } catch (UnsupportedEncodingException e) {
    575       throw new RuntimeException("UTF-8 not supported?", e);
    576     }
    577   }
    578 
    579   /**
    580    * Tells whether this {@code ByteString} represents a well-formed UTF-8
    581    * byte sequence, such that the original bytes can be converted to a
    582    * String object and then round tripped back to bytes without loss.
    583    *
    584    * <p>More precisely, returns {@code true} whenever: <pre> {@code
    585    * Arrays.equals(byteString.toByteArray(),
    586    *     new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8"))
    587    * }</pre>
    588    *
    589    * <p>This method returns {@code false} for "overlong" byte sequences,
    590    * as well as for 3-byte sequences that would map to a surrogate
    591    * character, in accordance with the restricted definition of UTF-8
    592    * introduced in Unicode 3.1.  Note that the UTF-8 decoder included in
    593    * Oracle's JDK has been modified to also reject "overlong" byte
    594    * sequences, but (as of 2011) still accepts 3-byte surrogate
    595    * character byte sequences.
    596    *
    597    * <p>See the Unicode Standard,</br>
    598    * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br>
    599    * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>.
    600    *
    601    * @return whether the bytes in this {@code ByteString} are a
    602    * well-formed UTF-8 byte sequence
    603    */
    604   public abstract boolean isValidUtf8();
    605 
    606   /**
    607    * Tells whether the given byte sequence is a well-formed, malformed, or
    608    * incomplete UTF-8 byte sequence.  This method accepts and returns a partial
    609    * state result, allowing the bytes for a complete UTF-8 byte sequence to be
    610    * composed from multiple {@code ByteString} segments.
    611    *
    612    * @param state either {@code 0} (if this is the initial decoding operation)
    613    *     or the value returned from a call to a partial decoding method for the
    614    *     previous bytes
    615    * @param offset offset of the first byte to check
    616    * @param length number of bytes to check
    617    *
    618    * @return {@code -1} if the partial byte sequence is definitely malformed,
    619    * {@code 0} if it is well-formed (no additional input needed), or, if the
    620    * byte sequence is "incomplete", i.e. apparently terminated in the middle of
    621    * a character, an opaque integer "state" value containing enough information
    622    * to decode the character when passed to a subsequent invocation of a
    623    * partial decoding method.
    624    */
    625   protected abstract int partialIsValidUtf8(int state, int offset, int length);
    626 
    627   // =================================================================
    628   // equals() and hashCode()
    629 
    630   @Override
    631   public abstract boolean equals(Object o);
    632 
    633   /**
    634    * Return a non-zero hashCode depending only on the sequence of bytes
    635    * in this ByteString.
    636    *
    637    * @return hashCode value for this object
    638    */
    639   @Override
    640   public abstract int hashCode();
    641 
    642   // =================================================================
    643   // Input stream
    644 
    645   /**
    646    * Creates an {@code InputStream} which can be used to read the bytes.
    647    * <p>
    648    * The {@link InputStream} returned by this method is guaranteed to be
    649    * completely non-blocking.  The method {@link InputStream#available()}
    650    * returns the number of bytes remaining in the stream. The methods
    651    * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)}
    652    * and {@link InputStream#skip(long)} will read/skip as many bytes as are
    653    * available.
    654    * <p>
    655    * The methods in the returned {@link InputStream} might <b>not</b> be
    656    * thread safe.
    657    *
    658    * @return an input stream that returns the bytes of this byte string.
    659    */
    660   public abstract InputStream newInput();
    661 
    662   /**
    663    * Creates a {@link CodedInputStream} which can be used to read the bytes.
    664    * Using this is often more efficient than creating a {@link CodedInputStream}
    665    * that wraps the result of {@link #newInput()}.
    666    *
    667    * @return stream based on wrapped data
    668    */
    669   public abstract CodedInputStream newCodedInput();
    670 
    671   // =================================================================
    672   // Output stream
    673 
    674   /**
    675    * Creates a new {@link Output} with the given initial capacity. Call {@link
    676    * Output#toByteString()} to create the {@code ByteString} instance.
    677    * <p>
    678    * A {@link ByteString.Output} offers the same functionality as a
    679    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
    680    * rather than a {@code byte} array.
    681    *
    682    * @param initialCapacity estimate of number of bytes to be written
    683    * @return {@code OutputStream} for building a {@code ByteString}
    684    */
    685   public static Output newOutput(int initialCapacity) {
    686     return new Output(initialCapacity);
    687   }
    688 
    689   /**
    690    * Creates a new {@link Output}. Call {@link Output#toByteString()} to create
    691    * the {@code ByteString} instance.
    692    * <p>
    693    * A {@link ByteString.Output} offers the same functionality as a
    694    * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString}
    695    * rather than a {@code byte array}.
    696    *
    697    * @return {@code OutputStream} for building a {@code ByteString}
    698    */
    699   public static Output newOutput() {
    700     return new Output(CONCATENATE_BY_COPY_SIZE);
    701   }
    702 
    703   /**
    704    * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to
    705    * create the {@code ByteString} instance.
    706    */
    707   public static final class Output extends OutputStream {
    708     // Implementation note.
    709     // The public methods of this class must be synchronized.  ByteStrings
    710     // are guaranteed to be immutable.  Without some sort of locking, it could
    711     // be possible for one thread to call toByteSring(), while another thread
    712     // is still modifying the underlying byte array.
    713 
    714     private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
    715     // argument passed by user, indicating initial capacity.
    716     private final int initialCapacity;
    717     // ByteStrings to be concatenated to create the result
    718     private final ArrayList<ByteString> flushedBuffers;
    719     // Total number of bytes in the ByteStrings of flushedBuffers
    720     private int flushedBuffersTotalBytes;
    721     // Current buffer to which we are writing
    722     private byte[] buffer;
    723     // Location in buffer[] to which we write the next byte.
    724     private int bufferPos;
    725 
    726     /**
    727      * Creates a new ByteString output stream with the specified
    728      * initial capacity.
    729      *
    730      * @param initialCapacity  the initial capacity of the output stream.
    731      */
    732     Output(int initialCapacity) {
    733       if (initialCapacity < 0) {
    734         throw new IllegalArgumentException("Buffer size < 0");
    735       }
    736       this.initialCapacity = initialCapacity;
    737       this.flushedBuffers = new ArrayList<ByteString>();
    738       this.buffer = new byte[initialCapacity];
    739     }
    740 
    741     @Override
    742     public synchronized void write(int b) {
    743       if (bufferPos == buffer.length) {
    744         flushFullBuffer(1);
    745       }
    746       buffer[bufferPos++] = (byte)b;
    747     }
    748 
    749     @Override
    750     public synchronized void write(byte[] b, int offset, int length)  {
    751       if (length <= buffer.length - bufferPos) {
    752         // The bytes can fit into the current buffer.
    753         System.arraycopy(b, offset, buffer, bufferPos, length);
    754         bufferPos += length;
    755       } else {
    756         // Use up the current buffer
    757         int copySize  = buffer.length - bufferPos;
    758         System.arraycopy(b, offset, buffer, bufferPos, copySize);
    759         offset += copySize;
    760         length -= copySize;
    761         // Flush the buffer, and get a new buffer at least big enough to cover
    762         // what we still need to output
    763         flushFullBuffer(length);
    764         System.arraycopy(b, offset, buffer, 0 /* count */, length);
    765         bufferPos = length;
    766       }
    767     }
    768 
    769     /**
    770      * Creates a byte string. Its size is the current size of this output
    771      * stream and its output has been copied to it.
    772      *
    773      * @return  the current contents of this output stream, as a byte string.
    774      */
    775     public synchronized ByteString toByteString() {
    776       flushLastBuffer();
    777       return ByteString.copyFrom(flushedBuffers);
    778     }
    779 
    780     /**
    781      * Writes the complete contents of this byte array output stream to
    782      * the specified output stream argument.
    783      *
    784      * @param out the output stream to which to write the data.
    785      * @throws IOException  if an I/O error occurs.
    786      */
    787     public void writeTo(OutputStream out) throws IOException {
    788       ByteString[] cachedFlushBuffers;
    789       byte[] cachedBuffer;
    790       int cachedBufferPos;
    791       synchronized (this) {
    792         // Copy the information we need into local variables so as to hold
    793         // the lock for as short a time as possible.
    794         cachedFlushBuffers =
    795             flushedBuffers.toArray(new ByteString[flushedBuffers.size()]);
    796         cachedBuffer = buffer;
    797         cachedBufferPos = bufferPos;
    798       }
    799       for (ByteString byteString : cachedFlushBuffers) {
    800         byteString.writeTo(out);
    801       }
    802 
    803       out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos));
    804     }
    805 
    806     /**
    807      * Returns the current size of the output stream.
    808      *
    809      * @return  the current size of the output stream
    810      */
    811     public synchronized int size() {
    812       return flushedBuffersTotalBytes + bufferPos;
    813     }
    814 
    815     /**
    816      * Resets this stream, so that all currently accumulated output in the
    817      * output stream is discarded. The output stream can be used again,
    818      * reusing the already allocated buffer space.
    819      */
    820     public synchronized void reset() {
    821       flushedBuffers.clear();
    822       flushedBuffersTotalBytes = 0;
    823       bufferPos = 0;
    824     }
    825 
    826     @Override
    827     public String toString() {
    828       return String.format("<ByteString.Output@%s size=%d>",
    829           Integer.toHexString(System.identityHashCode(this)), size());
    830     }
    831 
    832     /**
    833      * Internal function used by writers.  The current buffer is full, and the
    834      * writer needs a new buffer whose size is at least the specified minimum
    835      * size.
    836      */
    837     private void flushFullBuffer(int minSize)  {
    838       flushedBuffers.add(new LiteralByteString(buffer));
    839       flushedBuffersTotalBytes += buffer.length;
    840       // We want to increase our total capacity by 50%, but as a minimum,
    841       // the new buffer should also at least be >= minSize and
    842       // >= initial Capacity.
    843       int newSize = Math.max(initialCapacity,
    844           Math.max(minSize, flushedBuffersTotalBytes >>> 1));
    845       buffer = new byte[newSize];
    846       bufferPos = 0;
    847     }
    848 
    849     /**
    850      * Internal function used by {@link #toByteString()}. The current buffer may
    851      * or may not be full, but it needs to be flushed.
    852      */
    853     private void flushLastBuffer()  {
    854       if (bufferPos < buffer.length) {
    855         if (bufferPos > 0) {
    856           byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos);
    857           flushedBuffers.add(new LiteralByteString(bufferCopy));
    858         }
    859         // We reuse this buffer for further writes.
    860       } else {
    861         // Buffer is completely full.  Huzzah.
    862         flushedBuffers.add(new LiteralByteString(buffer));
    863         // 99% of the time, we're not going to use this OutputStream again.
    864         // We set buffer to an empty byte stream so that we're handling this
    865         // case without wasting space.  In the rare case that more writes
    866         // *do* occur, this empty buffer will be flushed and an appropriately
    867         // sized new buffer will be created.
    868         buffer = EMPTY_BYTE_ARRAY;
    869       }
    870       flushedBuffersTotalBytes += bufferPos;
    871       bufferPos = 0;
    872     }
    873   }
    874 
    875   /**
    876    * Constructs a new {@code ByteString} builder, which allows you to
    877    * efficiently construct a {@code ByteString} by writing to a {@link
    878    * CodedOutputStream}. Using this is much more efficient than calling {@code
    879    * newOutput()} and wrapping that in a {@code CodedOutputStream}.
    880    *
    881    * <p>This is package-private because it's a somewhat confusing interface.
    882    * Users can call {@link Message#toByteString()} instead of calling this
    883    * directly.
    884    *
    885    * @param size The target byte size of the {@code ByteString}.  You must write
    886    *     exactly this many bytes before building the result.
    887    * @return the builder
    888    */
    889   static CodedBuilder newCodedBuilder(int size) {
    890     return new CodedBuilder(size);
    891   }
    892 
    893   /** See {@link ByteString#newCodedBuilder(int)}. */
    894   static final class CodedBuilder {
    895     private final CodedOutputStream output;
    896     private final byte[] buffer;
    897 
    898     private CodedBuilder(int size) {
    899       buffer = new byte[size];
    900       output = CodedOutputStream.newInstance(buffer);
    901     }
    902 
    903     public ByteString build() {
    904       output.checkNoSpaceLeft();
    905 
    906       // We can be confident that the CodedOutputStream will not modify the
    907       // underlying bytes anymore because it already wrote all of them.  So,
    908       // no need to make a copy.
    909       return new LiteralByteString(buffer);
    910     }
    911 
    912     public CodedOutputStream getCodedOutput() {
    913       return output;
    914     }
    915   }
    916 
    917   // =================================================================
    918   // Methods {@link RopeByteString} needs on instances, which aren't part of the
    919   // public API.
    920 
    921   /**
    922    * Return the depth of the tree representing this {@code ByteString}, if any,
    923    * whose root is this node. If this is a leaf node, return 0.
    924    *
    925    * @return tree depth or zero
    926    */
    927   protected abstract int getTreeDepth();
    928 
    929   /**
    930    * Return {@code true} if this ByteString is literal (a leaf node) or a
    931    * flat-enough tree in the sense of {@link RopeByteString}.
    932    *
    933    * @return true if the tree is flat enough
    934    */
    935   protected abstract boolean isBalanced();
    936 
    937   /**
    938    * Return the cached hash code if available.
    939    *
    940    * @return value of cached hash code or 0 if not computed yet
    941    */
    942   protected abstract int peekCachedHashCode();
    943 
    944   /**
    945    * Compute the hash across the value bytes starting with the given hash, and
    946    * return the result.  This is used to compute the hash across strings
    947    * represented as a set of pieces by allowing the hash computation to be
    948    * continued from piece to piece.
    949    *
    950    * @param h starting hash value
    951    * @param offset offset into this value to start looking at data values
    952    * @param length number of data values to include in the hash computation
    953    * @return ending hash value
    954    */
    955   protected abstract int partialHash(int h, int offset, int length);
    956 
    957   @Override
    958   public String toString() {
    959     return String.format("<ByteString@%s size=%d>",
    960         Integer.toHexString(System.identityHashCode(this)), size());
    961   }
    962 }
    963