1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.ByteArrayOutputStream; 34 import java.io.IOException; 35 import java.io.InputStream; 36 import java.io.OutputStream; 37 import java.io.UnsupportedEncodingException; 38 import java.nio.ByteBuffer; 39 import java.util.ArrayList; 40 import java.util.Arrays; 41 import java.util.Collection; 42 import java.util.Iterator; 43 import java.util.List; 44 import java.util.NoSuchElementException; 45 46 /** 47 * Immutable sequence of bytes. Substring is supported by sharing the reference 48 * to the immutable underlying bytes, as with {@link String}. Concatenation is 49 * likewise supported without copying (long strings) by building a tree of 50 * pieces in {@link RopeByteString}. 51 * <p> 52 * Like {@link String}, the contents of a {@link ByteString} can never be 53 * observed to change, not even in the presence of a data race or incorrect 54 * API usage in the client code. 55 * 56 * @author crazybob (at) google.com Bob Lee 57 * @author kenton (at) google.com Kenton Varda 58 * @author carlanton (at) google.com Carl Haverl 59 * @author martinrb (at) google.com Martin Buchholz 60 */ 61 public abstract class ByteString implements Iterable<Byte> { 62 63 /** 64 * When two strings to be concatenated have a combined length shorter than 65 * this, we just copy their bytes on {@link #concat(ByteString)}. 66 * The trade-off is copy size versus the overhead of creating tree nodes 67 * in {@link RopeByteString}. 68 */ 69 static final int CONCATENATE_BY_COPY_SIZE = 128; 70 71 /** 72 * When copying an InputStream into a ByteString with .readFrom(), 73 * the chunks in the underlying rope start at 256 bytes, but double 74 * each iteration up to 8192 bytes. 75 */ 76 static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b 77 static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k 78 79 /** 80 * Empty {@code ByteString}. 81 */ 82 public static final ByteString EMPTY = new LiteralByteString(new byte[0]); 83 84 // This constructor is here to prevent subclassing outside of this package, 85 ByteString() {} 86 87 /** 88 * Gets the byte at the given index. This method should be used only for 89 * random access to individual bytes. To access bytes sequentially, use the 90 * {@link ByteIterator} returned by {@link #iterator()}, and call {@link 91 * #substring(int, int)} first if necessary. 92 * 93 * @param index index of byte 94 * @return the value 95 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 96 */ 97 public abstract byte byteAt(int index); 98 99 /** 100 * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. 101 * To avoid auto-boxing, you may get the iterator manually and call 102 * {@link ByteIterator#nextByte()}. 103 * 104 * @return the iterator 105 */ 106 public abstract ByteIterator iterator(); 107 108 /** 109 * This interface extends {@code Iterator<Byte>}, so that we can return an 110 * unboxed {@code byte}. 111 */ 112 public interface ByteIterator extends Iterator<Byte> { 113 /** 114 * An alternative to {@link Iterator#next()} that returns an 115 * unboxed primitive {@code byte}. 116 * 117 * @return the next {@code byte} in the iteration 118 * @throws NoSuchElementException if the iteration has no more elements 119 */ 120 byte nextByte(); 121 } 122 123 /** 124 * Gets the number of bytes. 125 * 126 * @return size in bytes 127 */ 128 public abstract int size(); 129 130 /** 131 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 132 * 133 * @return true if this is zero bytes long 134 */ 135 public boolean isEmpty() { 136 return size() == 0; 137 } 138 139 // ================================================================= 140 // ByteString -> substring 141 142 /** 143 * Return the substring from {@code beginIndex}, inclusive, to the end of the 144 * string. 145 * 146 * @param beginIndex start at this index 147 * @return substring sharing underlying data 148 * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or 149 * {@code beginIndex > size()}. 150 */ 151 public ByteString substring(int beginIndex) { 152 return substring(beginIndex, size()); 153 } 154 155 /** 156 * Return the substring from {@code beginIndex}, inclusive, to {@code 157 * endIndex}, exclusive. 158 * 159 * @param beginIndex start at this index 160 * @param endIndex the last character is the one before this index 161 * @return substring sharing underlying data 162 * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, 163 * {@code endIndex > size()}, or {@code beginIndex > endIndex}. 164 */ 165 public abstract ByteString substring(int beginIndex, int endIndex); 166 167 /** 168 * Tests if this bytestring starts with the specified prefix. 169 * Similar to {@link String#startsWith(String)} 170 * 171 * @param prefix the prefix. 172 * @return <code>true</code> if the byte sequence represented by the 173 * argument is a prefix of the byte sequence represented by 174 * this string; <code>false</code> otherwise. 175 */ 176 public boolean startsWith(ByteString prefix) { 177 return size() >= prefix.size() && 178 substring(0, prefix.size()).equals(prefix); 179 } 180 181 // ================================================================= 182 // byte[] -> ByteString 183 184 /** 185 * Copies the given bytes into a {@code ByteString}. 186 * 187 * @param bytes source array 188 * @param offset offset in source array 189 * @param size number of bytes to copy 190 * @return new {@code ByteString} 191 */ 192 public static ByteString copyFrom(byte[] bytes, int offset, int size) { 193 byte[] copy = new byte[size]; 194 System.arraycopy(bytes, offset, copy, 0, size); 195 return new LiteralByteString(copy); 196 } 197 198 /** 199 * Copies the given bytes into a {@code ByteString}. 200 * 201 * @param bytes to copy 202 * @return new {@code ByteString} 203 */ 204 public static ByteString copyFrom(byte[] bytes) { 205 return copyFrom(bytes, 0, bytes.length); 206 } 207 208 /** 209 * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into 210 * a {@code ByteString}. 211 * 212 * @param bytes source buffer 213 * @param size number of bytes to copy 214 * @return new {@code ByteString} 215 */ 216 public static ByteString copyFrom(ByteBuffer bytes, int size) { 217 byte[] copy = new byte[size]; 218 bytes.get(copy); 219 return new LiteralByteString(copy); 220 } 221 222 /** 223 * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into 224 * a {@code ByteString}. 225 * 226 * @param bytes sourceBuffer 227 * @return new {@code ByteString} 228 */ 229 public static ByteString copyFrom(ByteBuffer bytes) { 230 return copyFrom(bytes, bytes.remaining()); 231 } 232 233 /** 234 * Encodes {@code text} into a sequence of bytes using the named charset 235 * and returns the result as a {@code ByteString}. 236 * 237 * @param text source string 238 * @param charsetName encoding to use 239 * @return new {@code ByteString} 240 * @throws UnsupportedEncodingException if the encoding isn't found 241 */ 242 public static ByteString copyFrom(String text, String charsetName) 243 throws UnsupportedEncodingException { 244 return new LiteralByteString(text.getBytes(charsetName)); 245 } 246 247 /** 248 * Encodes {@code text} into a sequence of UTF-8 bytes and returns the 249 * result as a {@code ByteString}. 250 * 251 * @param text source string 252 * @return new {@code ByteString} 253 */ 254 public static ByteString copyFromUtf8(String text) { 255 try { 256 return new LiteralByteString(text.getBytes("UTF-8")); 257 } catch (UnsupportedEncodingException e) { 258 throw new RuntimeException("UTF-8 not supported?", e); 259 } 260 } 261 262 // ================================================================= 263 // InputStream -> ByteString 264 265 /** 266 * Completely reads the given stream's bytes into a 267 * {@code ByteString}, blocking if necessary until all bytes are 268 * read through to the end of the stream. 269 * 270 * <b>Performance notes:</b> The returned {@code ByteString} is an 271 * immutable tree of byte arrays ("chunks") of the stream data. The 272 * first chunk is small, with subsequent chunks each being double 273 * the size, up to 8K. If the caller knows the precise length of 274 * the stream and wishes to avoid all unnecessary copies and 275 * allocations, consider using the two-argument version of this 276 * method, below. 277 * 278 * @param streamToDrain The source stream, which is read completely 279 * but not closed. 280 * @return A new {@code ByteString} which is made up of chunks of 281 * various sizes, depending on the behavior of the underlying 282 * stream. 283 * @throws IOException IOException is thrown if there is a problem 284 * reading the underlying stream. 285 */ 286 public static ByteString readFrom(InputStream streamToDrain) 287 throws IOException { 288 return readFrom( 289 streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); 290 } 291 292 /** 293 * Completely reads the given stream's bytes into a 294 * {@code ByteString}, blocking if necessary until all bytes are 295 * read through to the end of the stream. 296 * 297 * <b>Performance notes:</b> The returned {@code ByteString} is an 298 * immutable tree of byte arrays ("chunks") of the stream data. The 299 * chunkSize parameter sets the size of these byte arrays. In 300 * particular, if the chunkSize is precisely the same as the length 301 * of the stream, unnecessary allocations and copies will be 302 * avoided. Otherwise, the chunks will be of the given size, except 303 * for the last chunk, which will be resized (via a reallocation and 304 * copy) to contain the remainder of the stream. 305 * 306 * @param streamToDrain The source stream, which is read completely 307 * but not closed. 308 * @param chunkSize The size of the chunks in which to read the 309 * stream. 310 * @return A new {@code ByteString} which is made up of chunks of 311 * the given size. 312 * @throws IOException IOException is thrown if there is a problem 313 * reading the underlying stream. 314 */ 315 public static ByteString readFrom(InputStream streamToDrain, int chunkSize) 316 throws IOException { 317 return readFrom(streamToDrain, chunkSize, chunkSize); 318 } 319 320 // Helper method that takes the chunk size range as a parameter. 321 public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, 322 int maxChunkSize) throws IOException { 323 Collection<ByteString> results = new ArrayList<ByteString>(); 324 325 // copy the inbound bytes into a list of chunks; the chunk size 326 // grows exponentially to support both short and long streams. 327 int chunkSize = minChunkSize; 328 while (true) { 329 ByteString chunk = readChunk(streamToDrain, chunkSize); 330 if (chunk == null) { 331 break; 332 } 333 results.add(chunk); 334 chunkSize = Math.min(chunkSize * 2, maxChunkSize); 335 } 336 337 return ByteString.copyFrom(results); 338 } 339 340 /** 341 * Blocks until a chunk of the given size can be made from the 342 * stream, or EOF is reached. Calls read() repeatedly in case the 343 * given stream implementation doesn't completely fill the given 344 * buffer in one read() call. 345 * 346 * @return A chunk of the desired size, or else a chunk as large as 347 * was available when end of stream was reached. Returns null if the 348 * given stream had no more data in it. 349 */ 350 private static ByteString readChunk(InputStream in, final int chunkSize) 351 throws IOException { 352 final byte[] buf = new byte[chunkSize]; 353 int bytesRead = 0; 354 while (bytesRead < chunkSize) { 355 final int count = in.read(buf, bytesRead, chunkSize - bytesRead); 356 if (count == -1) { 357 break; 358 } 359 bytesRead += count; 360 } 361 362 if (bytesRead == 0) { 363 return null; 364 } else { 365 return ByteString.copyFrom(buf, 0, bytesRead); 366 } 367 } 368 369 // ================================================================= 370 // Multiple ByteStrings -> One ByteString 371 372 /** 373 * Concatenate the given {@code ByteString} to this one. Short concatenations, 374 * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are 375 * produced by copying the underlying bytes (as per Rope.java, <a 376 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 377 * BAP95 </a>. In general, the concatenate involves no copying. 378 * 379 * @param other string to concatenate 380 * @return a new {@code ByteString} instance 381 */ 382 public ByteString concat(ByteString other) { 383 int thisSize = size(); 384 int otherSize = other.size(); 385 if ((long) thisSize + otherSize >= Integer.MAX_VALUE) { 386 throw new IllegalArgumentException("ByteString would be too long: " + 387 thisSize + "+" + otherSize); 388 } 389 390 return RopeByteString.concatenate(this, other); 391 } 392 393 /** 394 * Concatenates all byte strings in the iterable and returns the result. 395 * This is designed to run in O(list size), not O(total bytes). 396 * 397 * <p>The returned {@code ByteString} is not necessarily a unique object. 398 * If the list is empty, the returned object is the singleton empty 399 * {@code ByteString}. If the list has only one element, that 400 * {@code ByteString} will be returned without copying. 401 * 402 * @param byteStrings strings to be concatenated 403 * @return new {@code ByteString} 404 */ 405 public static ByteString copyFrom(Iterable<ByteString> byteStrings) { 406 Collection<ByteString> collection; 407 if (!(byteStrings instanceof Collection)) { 408 collection = new ArrayList<ByteString>(); 409 for (ByteString byteString : byteStrings) { 410 collection.add(byteString); 411 } 412 } else { 413 collection = (Collection<ByteString>) byteStrings; 414 } 415 ByteString result; 416 if (collection.isEmpty()) { 417 result = EMPTY; 418 } else { 419 result = balancedConcat(collection.iterator(), collection.size()); 420 } 421 return result; 422 } 423 424 // Internal function used by copyFrom(Iterable<ByteString>). 425 // Create a balanced concatenation of the next "length" elements from the 426 // iterable. 427 private static ByteString balancedConcat(Iterator<ByteString> iterator, 428 int length) { 429 assert length >= 1; 430 ByteString result; 431 if (length == 1) { 432 result = iterator.next(); 433 } else { 434 int halfLength = length >>> 1; 435 ByteString left = balancedConcat(iterator, halfLength); 436 ByteString right = balancedConcat(iterator, length - halfLength); 437 result = left.concat(right); 438 } 439 return result; 440 } 441 442 // ================================================================= 443 // ByteString -> byte[] 444 445 /** 446 * Copies bytes into a buffer at the given offset. 447 * 448 * @param target buffer to copy into 449 * @param offset in the target buffer 450 * @throws IndexOutOfBoundsException if the offset is negative or too large 451 */ 452 public void copyTo(byte[] target, int offset) { 453 copyTo(target, 0, offset, size()); 454 } 455 456 /** 457 * Copies bytes into a buffer. 458 * 459 * @param target buffer to copy into 460 * @param sourceOffset offset within these bytes 461 * @param targetOffset offset within the target buffer 462 * @param numberToCopy number of bytes to copy 463 * @throws IndexOutOfBoundsException if an offset or size is negative or too 464 * large 465 */ 466 public void copyTo(byte[] target, int sourceOffset, int targetOffset, 467 int numberToCopy) { 468 if (sourceOffset < 0) { 469 throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset); 470 } 471 if (targetOffset < 0) { 472 throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset); 473 } 474 if (numberToCopy < 0) { 475 throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy); 476 } 477 if (sourceOffset + numberToCopy > size()) { 478 throw new IndexOutOfBoundsException( 479 "Source end offset < 0: " + (sourceOffset + numberToCopy)); 480 } 481 if (targetOffset + numberToCopy > target.length) { 482 throw new IndexOutOfBoundsException( 483 "Target end offset < 0: " + (targetOffset + numberToCopy)); 484 } 485 if (numberToCopy > 0) { 486 copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 487 } 488 } 489 490 /** 491 * Internal (package private) implementation of 492 * @link{#copyTo(byte[],int,int,int}. 493 * It assumes that all error checking has already been performed and that 494 * @code{numberToCopy > 0}. 495 */ 496 protected abstract void copyToInternal(byte[] target, int sourceOffset, 497 int targetOffset, int numberToCopy); 498 499 /** 500 * Copies bytes into a ByteBuffer. 501 * 502 * @param target ByteBuffer to copy into. 503 * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only 504 * @throws java.nio.BufferOverflowException if the {@code target}'s 505 * remaining() space is not large enough to hold the data. 506 */ 507 public abstract void copyTo(ByteBuffer target); 508 509 /** 510 * Copies bytes to a {@code byte[]}. 511 * 512 * @return copied bytes 513 */ 514 public byte[] toByteArray() { 515 int size = size(); 516 byte[] result = new byte[size]; 517 copyToInternal(result, 0, 0, size); 518 return result; 519 } 520 521 /** 522 * Writes the complete contents of this byte string to 523 * the specified output stream argument. 524 * 525 * @param out the output stream to which to write the data. 526 * @throws IOException if an I/O error occurs. 527 */ 528 public abstract void writeTo(OutputStream out) throws IOException; 529 530 /** 531 * Constructs a read-only {@code java.nio.ByteBuffer} whose content 532 * is equal to the contents of this byte string. 533 * The result uses the same backing array as the byte string, if possible. 534 * 535 * @return wrapped bytes 536 */ 537 public abstract ByteBuffer asReadOnlyByteBuffer(); 538 539 /** 540 * Constructs a list of read-only {@code java.nio.ByteBuffer} objects 541 * such that the concatenation of their contents is equal to the contents 542 * of this byte string. The result uses the same backing arrays as the 543 * byte string. 544 * <p> 545 * By returning a list, implementations of this method may be able to avoid 546 * copying even when there are multiple backing arrays. 547 * 548 * @return a list of wrapped bytes 549 */ 550 public abstract List<ByteBuffer> asReadOnlyByteBufferList(); 551 552 /** 553 * Constructs a new {@code String} by decoding the bytes using the 554 * specified charset. 555 * 556 * @param charsetName encode using this charset 557 * @return new string 558 * @throws UnsupportedEncodingException if charset isn't recognized 559 */ 560 public abstract String toString(String charsetName) 561 throws UnsupportedEncodingException; 562 563 // ================================================================= 564 // UTF-8 decoding 565 566 /** 567 * Constructs a new {@code String} by decoding the bytes as UTF-8. 568 * 569 * @return new string using UTF-8 encoding 570 */ 571 public String toStringUtf8() { 572 try { 573 return toString("UTF-8"); 574 } catch (UnsupportedEncodingException e) { 575 throw new RuntimeException("UTF-8 not supported?", e); 576 } 577 } 578 579 /** 580 * Tells whether this {@code ByteString} represents a well-formed UTF-8 581 * byte sequence, such that the original bytes can be converted to a 582 * String object and then round tripped back to bytes without loss. 583 * 584 * <p>More precisely, returns {@code true} whenever: <pre> {@code 585 * Arrays.equals(byteString.toByteArray(), 586 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 587 * }</pre> 588 * 589 * <p>This method returns {@code false} for "overlong" byte sequences, 590 * as well as for 3-byte sequences that would map to a surrogate 591 * character, in accordance with the restricted definition of UTF-8 592 * introduced in Unicode 3.1. Note that the UTF-8 decoder included in 593 * Oracle's JDK has been modified to also reject "overlong" byte 594 * sequences, but (as of 2011) still accepts 3-byte surrogate 595 * character byte sequences. 596 * 597 * <p>See the Unicode Standard,</br> 598 * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> 599 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 600 * 601 * @return whether the bytes in this {@code ByteString} are a 602 * well-formed UTF-8 byte sequence 603 */ 604 public abstract boolean isValidUtf8(); 605 606 /** 607 * Tells whether the given byte sequence is a well-formed, malformed, or 608 * incomplete UTF-8 byte sequence. This method accepts and returns a partial 609 * state result, allowing the bytes for a complete UTF-8 byte sequence to be 610 * composed from multiple {@code ByteString} segments. 611 * 612 * @param state either {@code 0} (if this is the initial decoding operation) 613 * or the value returned from a call to a partial decoding method for the 614 * previous bytes 615 * @param offset offset of the first byte to check 616 * @param length number of bytes to check 617 * 618 * @return {@code -1} if the partial byte sequence is definitely malformed, 619 * {@code 0} if it is well-formed (no additional input needed), or, if the 620 * byte sequence is "incomplete", i.e. apparently terminated in the middle of 621 * a character, an opaque integer "state" value containing enough information 622 * to decode the character when passed to a subsequent invocation of a 623 * partial decoding method. 624 */ 625 protected abstract int partialIsValidUtf8(int state, int offset, int length); 626 627 // ================================================================= 628 // equals() and hashCode() 629 630 @Override 631 public abstract boolean equals(Object o); 632 633 /** 634 * Return a non-zero hashCode depending only on the sequence of bytes 635 * in this ByteString. 636 * 637 * @return hashCode value for this object 638 */ 639 @Override 640 public abstract int hashCode(); 641 642 // ================================================================= 643 // Input stream 644 645 /** 646 * Creates an {@code InputStream} which can be used to read the bytes. 647 * <p> 648 * The {@link InputStream} returned by this method is guaranteed to be 649 * completely non-blocking. The method {@link InputStream#available()} 650 * returns the number of bytes remaining in the stream. The methods 651 * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)} 652 * and {@link InputStream#skip(long)} will read/skip as many bytes as are 653 * available. 654 * <p> 655 * The methods in the returned {@link InputStream} might <b>not</b> be 656 * thread safe. 657 * 658 * @return an input stream that returns the bytes of this byte string. 659 */ 660 public abstract InputStream newInput(); 661 662 /** 663 * Creates a {@link CodedInputStream} which can be used to read the bytes. 664 * Using this is often more efficient than creating a {@link CodedInputStream} 665 * that wraps the result of {@link #newInput()}. 666 * 667 * @return stream based on wrapped data 668 */ 669 public abstract CodedInputStream newCodedInput(); 670 671 // ================================================================= 672 // Output stream 673 674 /** 675 * Creates a new {@link Output} with the given initial capacity. Call {@link 676 * Output#toByteString()} to create the {@code ByteString} instance. 677 * <p> 678 * A {@link ByteString.Output} offers the same functionality as a 679 * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} 680 * rather than a {@code byte} array. 681 * 682 * @param initialCapacity estimate of number of bytes to be written 683 * @return {@code OutputStream} for building a {@code ByteString} 684 */ 685 public static Output newOutput(int initialCapacity) { 686 return new Output(initialCapacity); 687 } 688 689 /** 690 * Creates a new {@link Output}. Call {@link Output#toByteString()} to create 691 * the {@code ByteString} instance. 692 * <p> 693 * A {@link ByteString.Output} offers the same functionality as a 694 * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} 695 * rather than a {@code byte array}. 696 * 697 * @return {@code OutputStream} for building a {@code ByteString} 698 */ 699 public static Output newOutput() { 700 return new Output(CONCATENATE_BY_COPY_SIZE); 701 } 702 703 /** 704 * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to 705 * create the {@code ByteString} instance. 706 */ 707 public static final class Output extends OutputStream { 708 // Implementation note. 709 // The public methods of this class must be synchronized. ByteStrings 710 // are guaranteed to be immutable. Without some sort of locking, it could 711 // be possible for one thread to call toByteSring(), while another thread 712 // is still modifying the underlying byte array. 713 714 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 715 // argument passed by user, indicating initial capacity. 716 private final int initialCapacity; 717 // ByteStrings to be concatenated to create the result 718 private final ArrayList<ByteString> flushedBuffers; 719 // Total number of bytes in the ByteStrings of flushedBuffers 720 private int flushedBuffersTotalBytes; 721 // Current buffer to which we are writing 722 private byte[] buffer; 723 // Location in buffer[] to which we write the next byte. 724 private int bufferPos; 725 726 /** 727 * Creates a new ByteString output stream with the specified 728 * initial capacity. 729 * 730 * @param initialCapacity the initial capacity of the output stream. 731 */ 732 Output(int initialCapacity) { 733 if (initialCapacity < 0) { 734 throw new IllegalArgumentException("Buffer size < 0"); 735 } 736 this.initialCapacity = initialCapacity; 737 this.flushedBuffers = new ArrayList<ByteString>(); 738 this.buffer = new byte[initialCapacity]; 739 } 740 741 @Override 742 public synchronized void write(int b) { 743 if (bufferPos == buffer.length) { 744 flushFullBuffer(1); 745 } 746 buffer[bufferPos++] = (byte)b; 747 } 748 749 @Override 750 public synchronized void write(byte[] b, int offset, int length) { 751 if (length <= buffer.length - bufferPos) { 752 // The bytes can fit into the current buffer. 753 System.arraycopy(b, offset, buffer, bufferPos, length); 754 bufferPos += length; 755 } else { 756 // Use up the current buffer 757 int copySize = buffer.length - bufferPos; 758 System.arraycopy(b, offset, buffer, bufferPos, copySize); 759 offset += copySize; 760 length -= copySize; 761 // Flush the buffer, and get a new buffer at least big enough to cover 762 // what we still need to output 763 flushFullBuffer(length); 764 System.arraycopy(b, offset, buffer, 0 /* count */, length); 765 bufferPos = length; 766 } 767 } 768 769 /** 770 * Creates a byte string. Its size is the current size of this output 771 * stream and its output has been copied to it. 772 * 773 * @return the current contents of this output stream, as a byte string. 774 */ 775 public synchronized ByteString toByteString() { 776 flushLastBuffer(); 777 return ByteString.copyFrom(flushedBuffers); 778 } 779 780 /** 781 * Writes the complete contents of this byte array output stream to 782 * the specified output stream argument. 783 * 784 * @param out the output stream to which to write the data. 785 * @throws IOException if an I/O error occurs. 786 */ 787 public void writeTo(OutputStream out) throws IOException { 788 ByteString[] cachedFlushBuffers; 789 byte[] cachedBuffer; 790 int cachedBufferPos; 791 synchronized (this) { 792 // Copy the information we need into local variables so as to hold 793 // the lock for as short a time as possible. 794 cachedFlushBuffers = 795 flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); 796 cachedBuffer = buffer; 797 cachedBufferPos = bufferPos; 798 } 799 for (ByteString byteString : cachedFlushBuffers) { 800 byteString.writeTo(out); 801 } 802 803 out.write(Arrays.copyOf(cachedBuffer, cachedBufferPos)); 804 } 805 806 /** 807 * Returns the current size of the output stream. 808 * 809 * @return the current size of the output stream 810 */ 811 public synchronized int size() { 812 return flushedBuffersTotalBytes + bufferPos; 813 } 814 815 /** 816 * Resets this stream, so that all currently accumulated output in the 817 * output stream is discarded. The output stream can be used again, 818 * reusing the already allocated buffer space. 819 */ 820 public synchronized void reset() { 821 flushedBuffers.clear(); 822 flushedBuffersTotalBytes = 0; 823 bufferPos = 0; 824 } 825 826 @Override 827 public String toString() { 828 return String.format("<ByteString.Output@%s size=%d>", 829 Integer.toHexString(System.identityHashCode(this)), size()); 830 } 831 832 /** 833 * Internal function used by writers. The current buffer is full, and the 834 * writer needs a new buffer whose size is at least the specified minimum 835 * size. 836 */ 837 private void flushFullBuffer(int minSize) { 838 flushedBuffers.add(new LiteralByteString(buffer)); 839 flushedBuffersTotalBytes += buffer.length; 840 // We want to increase our total capacity by 50%, but as a minimum, 841 // the new buffer should also at least be >= minSize and 842 // >= initial Capacity. 843 int newSize = Math.max(initialCapacity, 844 Math.max(minSize, flushedBuffersTotalBytes >>> 1)); 845 buffer = new byte[newSize]; 846 bufferPos = 0; 847 } 848 849 /** 850 * Internal function used by {@link #toByteString()}. The current buffer may 851 * or may not be full, but it needs to be flushed. 852 */ 853 private void flushLastBuffer() { 854 if (bufferPos < buffer.length) { 855 if (bufferPos > 0) { 856 byte[] bufferCopy = Arrays.copyOf(buffer, bufferPos); 857 flushedBuffers.add(new LiteralByteString(bufferCopy)); 858 } 859 // We reuse this buffer for further writes. 860 } else { 861 // Buffer is completely full. Huzzah. 862 flushedBuffers.add(new LiteralByteString(buffer)); 863 // 99% of the time, we're not going to use this OutputStream again. 864 // We set buffer to an empty byte stream so that we're handling this 865 // case without wasting space. In the rare case that more writes 866 // *do* occur, this empty buffer will be flushed and an appropriately 867 // sized new buffer will be created. 868 buffer = EMPTY_BYTE_ARRAY; 869 } 870 flushedBuffersTotalBytes += bufferPos; 871 bufferPos = 0; 872 } 873 } 874 875 /** 876 * Constructs a new {@code ByteString} builder, which allows you to 877 * efficiently construct a {@code ByteString} by writing to a {@link 878 * CodedOutputStream}. Using this is much more efficient than calling {@code 879 * newOutput()} and wrapping that in a {@code CodedOutputStream}. 880 * 881 * <p>This is package-private because it's a somewhat confusing interface. 882 * Users can call {@link Message#toByteString()} instead of calling this 883 * directly. 884 * 885 * @param size The target byte size of the {@code ByteString}. You must write 886 * exactly this many bytes before building the result. 887 * @return the builder 888 */ 889 static CodedBuilder newCodedBuilder(int size) { 890 return new CodedBuilder(size); 891 } 892 893 /** See {@link ByteString#newCodedBuilder(int)}. */ 894 static final class CodedBuilder { 895 private final CodedOutputStream output; 896 private final byte[] buffer; 897 898 private CodedBuilder(int size) { 899 buffer = new byte[size]; 900 output = CodedOutputStream.newInstance(buffer); 901 } 902 903 public ByteString build() { 904 output.checkNoSpaceLeft(); 905 906 // We can be confident that the CodedOutputStream will not modify the 907 // underlying bytes anymore because it already wrote all of them. So, 908 // no need to make a copy. 909 return new LiteralByteString(buffer); 910 } 911 912 public CodedOutputStream getCodedOutput() { 913 return output; 914 } 915 } 916 917 // ================================================================= 918 // Methods {@link RopeByteString} needs on instances, which aren't part of the 919 // public API. 920 921 /** 922 * Return the depth of the tree representing this {@code ByteString}, if any, 923 * whose root is this node. If this is a leaf node, return 0. 924 * 925 * @return tree depth or zero 926 */ 927 protected abstract int getTreeDepth(); 928 929 /** 930 * Return {@code true} if this ByteString is literal (a leaf node) or a 931 * flat-enough tree in the sense of {@link RopeByteString}. 932 * 933 * @return true if the tree is flat enough 934 */ 935 protected abstract boolean isBalanced(); 936 937 /** 938 * Return the cached hash code if available. 939 * 940 * @return value of cached hash code or 0 if not computed yet 941 */ 942 protected abstract int peekCachedHashCode(); 943 944 /** 945 * Compute the hash across the value bytes starting with the given hash, and 946 * return the result. This is used to compute the hash across strings 947 * represented as a set of pieces by allowing the hash computation to be 948 * continued from piece to piece. 949 * 950 * @param h starting hash value 951 * @param offset offset into this value to start looking at data values 952 * @param length number of data values to include in the hash computation 953 * @return ending hash value 954 */ 955 protected abstract int partialHash(int h, int offset, int length); 956 957 @Override 958 public String toString() { 959 return String.format("<ByteString@%s size=%d>", 960 Integer.toHexString(System.identityHashCode(this)), size()); 961 } 962 } 963