1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import java.io.ByteArrayOutputStream; 34 import java.io.IOException; 35 import java.io.InputStream; 36 import java.io.OutputStream; 37 import java.io.UnsupportedEncodingException; 38 import java.nio.ByteBuffer; 39 import java.util.ArrayList; 40 import java.util.Collection; 41 import java.util.Iterator; 42 import java.util.List; 43 import java.util.NoSuchElementException; 44 45 /** 46 * Immutable sequence of bytes. Substring is supported by sharing the reference 47 * to the immutable underlying bytes, as with {@link String}. Concatenation is 48 * likewise supported without copying (long strings) by building a tree of 49 * pieces in {@link RopeByteString}. 50 * <p> 51 * Like {@link String}, the contents of a {@link ByteString} can never be 52 * observed to change, not even in the presence of a data race or incorrect 53 * API usage in the client code. 54 * 55 * @author crazybob (at) google.com Bob Lee 56 * @author kenton (at) google.com Kenton Varda 57 * @author carlanton (at) google.com Carl Haverl 58 * @author martinrb (at) google.com Martin Buchholz 59 */ 60 public abstract class ByteString implements Iterable<Byte> { 61 62 /** 63 * When two strings to be concatenated have a combined length shorter than 64 * this, we just copy their bytes on {@link #concat(ByteString)}. 65 * The trade-off is copy size versus the overhead of creating tree nodes 66 * in {@link RopeByteString}. 67 */ 68 static final int CONCATENATE_BY_COPY_SIZE = 128; 69 70 /** 71 * When copying an InputStream into a ByteString with .readFrom(), 72 * the chunks in the underlying rope start at 256 bytes, but double 73 * each iteration up to 8192 bytes. 74 */ 75 static final int MIN_READ_FROM_CHUNK_SIZE = 0x100; // 256b 76 static final int MAX_READ_FROM_CHUNK_SIZE = 0x2000; // 8k 77 78 /** 79 * Empty {@code ByteString}. 80 */ 81 public static final ByteString EMPTY = new LiteralByteString(new byte[0]); 82 83 // This constructor is here to prevent subclassing outside of this package, 84 ByteString() {} 85 86 /** 87 * Gets the byte at the given index. This method should be used only for 88 * random access to individual bytes. To access bytes sequentially, use the 89 * {@link ByteIterator} returned by {@link #iterator()}, and call {@link 90 * #substring(int, int)} first if necessary. 91 * 92 * @param index index of byte 93 * @return the value 94 * @throws ArrayIndexOutOfBoundsException {@code index} is < 0 or >= size 95 */ 96 public abstract byte byteAt(int index); 97 98 /** 99 * Return a {@link ByteString.ByteIterator} over the bytes in the ByteString. 100 * To avoid auto-boxing, you may get the iterator manually and call 101 * {@link ByteIterator#nextByte()}. 102 * 103 * @return the iterator 104 */ 105 public abstract ByteIterator iterator(); 106 107 /** 108 * This interface extends {@code Iterator<Byte>}, so that we can return an 109 * unboxed {@code byte}. 110 */ 111 public interface ByteIterator extends Iterator<Byte> { 112 /** 113 * An alternative to {@link Iterator#next()} that returns an 114 * unboxed primitive {@code byte}. 115 * 116 * @return the next {@code byte} in the iteration 117 * @throws NoSuchElementException if the iteration has no more elements 118 */ 119 byte nextByte(); 120 } 121 122 /** 123 * Gets the number of bytes. 124 * 125 * @return size in bytes 126 */ 127 public abstract int size(); 128 129 /** 130 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 131 * 132 * @return true if this is zero bytes long 133 */ 134 public boolean isEmpty() { 135 return size() == 0; 136 } 137 138 // ================================================================= 139 // ByteString -> substring 140 141 /** 142 * Return the substring from {@code beginIndex}, inclusive, to the end of the 143 * string. 144 * 145 * @param beginIndex start at this index 146 * @return substring sharing underlying data 147 * @throws IndexOutOfBoundsException if {@code beginIndex < 0} or 148 * {@code beginIndex > size()}. 149 */ 150 public ByteString substring(int beginIndex) { 151 return substring(beginIndex, size()); 152 } 153 154 /** 155 * Return the substring from {@code beginIndex}, inclusive, to {@code 156 * endIndex}, exclusive. 157 * 158 * @param beginIndex start at this index 159 * @param endIndex the last character is the one before this index 160 * @return substring sharing underlying data 161 * @throws IndexOutOfBoundsException if {@code beginIndex < 0}, 162 * {@code endIndex > size()}, or {@code beginIndex > endIndex}. 163 */ 164 public abstract ByteString substring(int beginIndex, int endIndex); 165 166 /** 167 * Tests if this bytestring starts with the specified prefix. 168 * Similar to {@link String#startsWith(String)} 169 * 170 * @param prefix the prefix. 171 * @return <code>true</code> if the byte sequence represented by the 172 * argument is a prefix of the byte sequence represented by 173 * this string; <code>false</code> otherwise. 174 */ 175 public boolean startsWith(ByteString prefix) { 176 return size() >= prefix.size() && 177 substring(0, prefix.size()).equals(prefix); 178 } 179 180 // ================================================================= 181 // byte[] -> ByteString 182 183 /** 184 * Copies the given bytes into a {@code ByteString}. 185 * 186 * @param bytes source array 187 * @param offset offset in source array 188 * @param size number of bytes to copy 189 * @return new {@code ByteString} 190 */ 191 public static ByteString copyFrom(byte[] bytes, int offset, int size) { 192 byte[] copy = new byte[size]; 193 System.arraycopy(bytes, offset, copy, 0, size); 194 return new LiteralByteString(copy); 195 } 196 197 /** 198 * Copies the given bytes into a {@code ByteString}. 199 * 200 * @param bytes to copy 201 * @return new {@code ByteString} 202 */ 203 public static ByteString copyFrom(byte[] bytes) { 204 return copyFrom(bytes, 0, bytes.length); 205 } 206 207 /** 208 * Copies the next {@code size} bytes from a {@code java.nio.ByteBuffer} into 209 * a {@code ByteString}. 210 * 211 * @param bytes source buffer 212 * @param size number of bytes to copy 213 * @return new {@code ByteString} 214 */ 215 public static ByteString copyFrom(ByteBuffer bytes, int size) { 216 byte[] copy = new byte[size]; 217 bytes.get(copy); 218 return new LiteralByteString(copy); 219 } 220 221 /** 222 * Copies the remaining bytes from a {@code java.nio.ByteBuffer} into 223 * a {@code ByteString}. 224 * 225 * @param bytes sourceBuffer 226 * @return new {@code ByteString} 227 */ 228 public static ByteString copyFrom(ByteBuffer bytes) { 229 return copyFrom(bytes, bytes.remaining()); 230 } 231 232 /** 233 * Encodes {@code text} into a sequence of bytes using the named charset 234 * and returns the result as a {@code ByteString}. 235 * 236 * @param text source string 237 * @param charsetName encoding to use 238 * @return new {@code ByteString} 239 * @throws UnsupportedEncodingException if the encoding isn't found 240 */ 241 public static ByteString copyFrom(String text, String charsetName) 242 throws UnsupportedEncodingException { 243 return new LiteralByteString(text.getBytes(charsetName)); 244 } 245 246 /** 247 * Encodes {@code text} into a sequence of UTF-8 bytes and returns the 248 * result as a {@code ByteString}. 249 * 250 * @param text source string 251 * @return new {@code ByteString} 252 */ 253 public static ByteString copyFromUtf8(String text) { 254 try { 255 return new LiteralByteString(text.getBytes("UTF-8")); 256 } catch (UnsupportedEncodingException e) { 257 throw new RuntimeException("UTF-8 not supported?", e); 258 } 259 } 260 261 // ================================================================= 262 // InputStream -> ByteString 263 264 /** 265 * Completely reads the given stream's bytes into a 266 * {@code ByteString}, blocking if necessary until all bytes are 267 * read through to the end of the stream. 268 * 269 * <b>Performance notes:</b> The returned {@code ByteString} is an 270 * immutable tree of byte arrays ("chunks") of the stream data. The 271 * first chunk is small, with subsequent chunks each being double 272 * the size, up to 8K. If the caller knows the precise length of 273 * the stream and wishes to avoid all unnecessary copies and 274 * allocations, consider using the two-argument version of this 275 * method, below. 276 * 277 * @param streamToDrain The source stream, which is read completely 278 * but not closed. 279 * @return A new {@code ByteString} which is made up of chunks of 280 * various sizes, depending on the behavior of the underlying 281 * stream. 282 * @throws IOException IOException is thrown if there is a problem 283 * reading the underlying stream. 284 */ 285 public static ByteString readFrom(InputStream streamToDrain) 286 throws IOException { 287 return readFrom( 288 streamToDrain, MIN_READ_FROM_CHUNK_SIZE, MAX_READ_FROM_CHUNK_SIZE); 289 } 290 291 /** 292 * Completely reads the given stream's bytes into a 293 * {@code ByteString}, blocking if necessary until all bytes are 294 * read through to the end of the stream. 295 * 296 * <b>Performance notes:</b> The returned {@code ByteString} is an 297 * immutable tree of byte arrays ("chunks") of the stream data. The 298 * chunkSize parameter sets the size of these byte arrays. In 299 * particular, if the chunkSize is precisely the same as the length 300 * of the stream, unnecessary allocations and copies will be 301 * avoided. Otherwise, the chunks will be of the given size, except 302 * for the last chunk, which will be resized (via a reallocation and 303 * copy) to contain the remainder of the stream. 304 * 305 * @param streamToDrain The source stream, which is read completely 306 * but not closed. 307 * @param chunkSize The size of the chunks in which to read the 308 * stream. 309 * @return A new {@code ByteString} which is made up of chunks of 310 * the given size. 311 * @throws IOException IOException is thrown if there is a problem 312 * reading the underlying stream. 313 */ 314 public static ByteString readFrom(InputStream streamToDrain, int chunkSize) 315 throws IOException { 316 return readFrom(streamToDrain, chunkSize, chunkSize); 317 } 318 319 // Helper method that takes the chunk size range as a parameter. 320 public static ByteString readFrom(InputStream streamToDrain, int minChunkSize, 321 int maxChunkSize) throws IOException { 322 Collection<ByteString> results = new ArrayList<ByteString>(); 323 324 // copy the inbound bytes into a list of chunks; the chunk size 325 // grows exponentially to support both short and long streams. 326 int chunkSize = minChunkSize; 327 while (true) { 328 ByteString chunk = readChunk(streamToDrain, chunkSize); 329 if (chunk == null) { 330 break; 331 } 332 results.add(chunk); 333 chunkSize = Math.min(chunkSize * 2, maxChunkSize); 334 } 335 336 return ByteString.copyFrom(results); 337 } 338 339 /** 340 * Blocks until a chunk of the given size can be made from the 341 * stream, or EOF is reached. Calls read() repeatedly in case the 342 * given stream implementation doesn't completely fill the given 343 * buffer in one read() call. 344 * 345 * @return A chunk of the desired size, or else a chunk as large as 346 * was available when end of stream was reached. Returns null if the 347 * given stream had no more data in it. 348 */ 349 private static ByteString readChunk(InputStream in, final int chunkSize) 350 throws IOException { 351 final byte[] buf = new byte[chunkSize]; 352 int bytesRead = 0; 353 while (bytesRead < chunkSize) { 354 final int count = in.read(buf, bytesRead, chunkSize - bytesRead); 355 if (count == -1) { 356 break; 357 } 358 bytesRead += count; 359 } 360 361 if (bytesRead == 0) { 362 return null; 363 } else { 364 return ByteString.copyFrom(buf, 0, bytesRead); 365 } 366 } 367 368 // ================================================================= 369 // Multiple ByteStrings -> One ByteString 370 371 /** 372 * Concatenate the given {@code ByteString} to this one. Short concatenations, 373 * of total size smaller than {@link ByteString#CONCATENATE_BY_COPY_SIZE}, are 374 * produced by copying the underlying bytes (as per Rope.java, <a 375 * href="http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf"> 376 * BAP95 </a>. In general, the concatenate involves no copying. 377 * 378 * @param other string to concatenate 379 * @return a new {@code ByteString} instance 380 */ 381 public ByteString concat(ByteString other) { 382 int thisSize = size(); 383 int otherSize = other.size(); 384 if ((long) thisSize + otherSize >= Integer.MAX_VALUE) { 385 throw new IllegalArgumentException("ByteString would be too long: " + 386 thisSize + "+" + otherSize); 387 } 388 389 return RopeByteString.concatenate(this, other); 390 } 391 392 /** 393 * Concatenates all byte strings in the iterable and returns the result. 394 * This is designed to run in O(list size), not O(total bytes). 395 * 396 * <p>The returned {@code ByteString} is not necessarily a unique object. 397 * If the list is empty, the returned object is the singleton empty 398 * {@code ByteString}. If the list has only one element, that 399 * {@code ByteString} will be returned without copying. 400 * 401 * @param byteStrings strings to be concatenated 402 * @return new {@code ByteString} 403 */ 404 public static ByteString copyFrom(Iterable<ByteString> byteStrings) { 405 Collection<ByteString> collection; 406 if (!(byteStrings instanceof Collection)) { 407 collection = new ArrayList<ByteString>(); 408 for (ByteString byteString : byteStrings) { 409 collection.add(byteString); 410 } 411 } else { 412 collection = (Collection<ByteString>) byteStrings; 413 } 414 ByteString result; 415 if (collection.isEmpty()) { 416 result = EMPTY; 417 } else { 418 result = balancedConcat(collection.iterator(), collection.size()); 419 } 420 return result; 421 } 422 423 // Internal function used by copyFrom(Iterable<ByteString>). 424 // Create a balanced concatenation of the next "length" elements from the 425 // iterable. 426 private static ByteString balancedConcat(Iterator<ByteString> iterator, 427 int length) { 428 assert length >= 1; 429 ByteString result; 430 if (length == 1) { 431 result = iterator.next(); 432 } else { 433 int halfLength = length >>> 1; 434 ByteString left = balancedConcat(iterator, halfLength); 435 ByteString right = balancedConcat(iterator, length - halfLength); 436 result = left.concat(right); 437 } 438 return result; 439 } 440 441 // ================================================================= 442 // ByteString -> byte[] 443 444 /** 445 * Copies bytes into a buffer at the given offset. 446 * 447 * @param target buffer to copy into 448 * @param offset in the target buffer 449 * @throws IndexOutOfBoundsException if the offset is negative or too large 450 */ 451 public void copyTo(byte[] target, int offset) { 452 copyTo(target, 0, offset, size()); 453 } 454 455 /** 456 * Copies bytes into a buffer. 457 * 458 * @param target buffer to copy into 459 * @param sourceOffset offset within these bytes 460 * @param targetOffset offset within the target buffer 461 * @param numberToCopy number of bytes to copy 462 * @throws IndexOutOfBoundsException if an offset or size is negative or too 463 * large 464 */ 465 public void copyTo(byte[] target, int sourceOffset, int targetOffset, 466 int numberToCopy) { 467 if (sourceOffset < 0) { 468 throw new IndexOutOfBoundsException("Source offset < 0: " + sourceOffset); 469 } 470 if (targetOffset < 0) { 471 throw new IndexOutOfBoundsException("Target offset < 0: " + targetOffset); 472 } 473 if (numberToCopy < 0) { 474 throw new IndexOutOfBoundsException("Length < 0: " + numberToCopy); 475 } 476 if (sourceOffset + numberToCopy > size()) { 477 throw new IndexOutOfBoundsException( 478 "Source end offset < 0: " + (sourceOffset + numberToCopy)); 479 } 480 if (targetOffset + numberToCopy > target.length) { 481 throw new IndexOutOfBoundsException( 482 "Target end offset < 0: " + (targetOffset + numberToCopy)); 483 } 484 if (numberToCopy > 0) { 485 copyToInternal(target, sourceOffset, targetOffset, numberToCopy); 486 } 487 } 488 489 /** 490 * Internal (package private) implementation of 491 * @link{#copyTo(byte[],int,int,int}. 492 * It assumes that all error checking has already been performed and that 493 * @code{numberToCopy > 0}. 494 */ 495 protected abstract void copyToInternal(byte[] target, int sourceOffset, 496 int targetOffset, int numberToCopy); 497 498 /** 499 * Copies bytes into a ByteBuffer. 500 * 501 * @param target ByteBuffer to copy into. 502 * @throws java.nio.ReadOnlyBufferException if the {@code target} is read-only 503 * @throws java.nio.BufferOverflowException if the {@code target}'s 504 * remaining() space is not large enough to hold the data. 505 */ 506 public abstract void copyTo(ByteBuffer target); 507 508 /** 509 * Copies bytes to a {@code byte[]}. 510 * 511 * @return copied bytes 512 */ 513 public byte[] toByteArray() { 514 int size = size(); 515 byte[] result = new byte[size]; 516 copyToInternal(result, 0, 0, size); 517 return result; 518 } 519 520 /** 521 * Writes the complete contents of this byte string to 522 * the specified output stream argument. 523 * 524 * @param out the output stream to which to write the data. 525 * @throws IOException if an I/O error occurs. 526 */ 527 public abstract void writeTo(OutputStream out) throws IOException; 528 529 /** 530 * Constructs a read-only {@code java.nio.ByteBuffer} whose content 531 * is equal to the contents of this byte string. 532 * The result uses the same backing array as the byte string, if possible. 533 * 534 * @return wrapped bytes 535 */ 536 public abstract ByteBuffer asReadOnlyByteBuffer(); 537 538 /** 539 * Constructs a list of read-only {@code java.nio.ByteBuffer} objects 540 * such that the concatenation of their contents is equal to the contents 541 * of this byte string. The result uses the same backing arrays as the 542 * byte string. 543 * <p> 544 * By returning a list, implementations of this method may be able to avoid 545 * copying even when there are multiple backing arrays. 546 * 547 * @return a list of wrapped bytes 548 */ 549 public abstract List<ByteBuffer> asReadOnlyByteBufferList(); 550 551 /** 552 * Constructs a new {@code String} by decoding the bytes using the 553 * specified charset. 554 * 555 * @param charsetName encode using this charset 556 * @return new string 557 * @throws UnsupportedEncodingException if charset isn't recognized 558 */ 559 public abstract String toString(String charsetName) 560 throws UnsupportedEncodingException; 561 562 // ================================================================= 563 // UTF-8 decoding 564 565 /** 566 * Constructs a new {@code String} by decoding the bytes as UTF-8. 567 * 568 * @return new string using UTF-8 encoding 569 */ 570 public String toStringUtf8() { 571 try { 572 return toString("UTF-8"); 573 } catch (UnsupportedEncodingException e) { 574 throw new RuntimeException("UTF-8 not supported?", e); 575 } 576 } 577 578 /** 579 * Tells whether this {@code ByteString} represents a well-formed UTF-8 580 * byte sequence, such that the original bytes can be converted to a 581 * String object and then round tripped back to bytes without loss. 582 * 583 * <p>More precisely, returns {@code true} whenever: <pre> {@code 584 * Arrays.equals(byteString.toByteArray(), 585 * new String(byteString.toByteArray(), "UTF-8").getBytes("UTF-8")) 586 * }</pre> 587 * 588 * <p>This method returns {@code false} for "overlong" byte sequences, 589 * as well as for 3-byte sequences that would map to a surrogate 590 * character, in accordance with the restricted definition of UTF-8 591 * introduced in Unicode 3.1. Note that the UTF-8 decoder included in 592 * Oracle's JDK has been modified to also reject "overlong" byte 593 * sequences, but (as of 2011) still accepts 3-byte surrogate 594 * character byte sequences. 595 * 596 * <p>See the Unicode Standard,</br> 597 * Table 3-6. <em>UTF-8 Bit Distribution</em>,</br> 598 * Table 3-7. <em>Well Formed UTF-8 Byte Sequences</em>. 599 * 600 * @return whether the bytes in this {@code ByteString} are a 601 * well-formed UTF-8 byte sequence 602 */ 603 public abstract boolean isValidUtf8(); 604 605 /** 606 * Tells whether the given byte sequence is a well-formed, malformed, or 607 * incomplete UTF-8 byte sequence. This method accepts and returns a partial 608 * state result, allowing the bytes for a complete UTF-8 byte sequence to be 609 * composed from multiple {@code ByteString} segments. 610 * 611 * @param state either {@code 0} (if this is the initial decoding operation) 612 * or the value returned from a call to a partial decoding method for the 613 * previous bytes 614 * @param offset offset of the first byte to check 615 * @param length number of bytes to check 616 * 617 * @return {@code -1} if the partial byte sequence is definitely malformed, 618 * {@code 0} if it is well-formed (no additional input needed), or, if the 619 * byte sequence is "incomplete", i.e. apparently terminated in the middle of 620 * a character, an opaque integer "state" value containing enough information 621 * to decode the character when passed to a subsequent invocation of a 622 * partial decoding method. 623 */ 624 protected abstract int partialIsValidUtf8(int state, int offset, int length); 625 626 // ================================================================= 627 // equals() and hashCode() 628 629 @Override 630 public abstract boolean equals(Object o); 631 632 /** 633 * Return a non-zero hashCode depending only on the sequence of bytes 634 * in this ByteString. 635 * 636 * @return hashCode value for this object 637 */ 638 @Override 639 public abstract int hashCode(); 640 641 // ================================================================= 642 // Input stream 643 644 /** 645 * Creates an {@code InputStream} which can be used to read the bytes. 646 * <p> 647 * The {@link InputStream} returned by this method is guaranteed to be 648 * completely non-blocking. The method {@link InputStream#available()} 649 * returns the number of bytes remaining in the stream. The methods 650 * {@link InputStream#read(byte[]), {@link InputStream#read(byte[],int,int)} 651 * and {@link InputStream#skip(long)} will read/skip as many bytes as are 652 * available. 653 * <p> 654 * The methods in the returned {@link InputStream} might <b>not</b> be 655 * thread safe. 656 * 657 * @return an input stream that returns the bytes of this byte string. 658 */ 659 public abstract InputStream newInput(); 660 661 /** 662 * Creates a {@link CodedInputStream} which can be used to read the bytes. 663 * Using this is often more efficient than creating a {@link CodedInputStream} 664 * that wraps the result of {@link #newInput()}. 665 * 666 * @return stream based on wrapped data 667 */ 668 public abstract CodedInputStream newCodedInput(); 669 670 // ================================================================= 671 // Output stream 672 673 /** 674 * Creates a new {@link Output} with the given initial capacity. Call {@link 675 * Output#toByteString()} to create the {@code ByteString} instance. 676 * <p> 677 * A {@link ByteString.Output} offers the same functionality as a 678 * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} 679 * rather than a {@code byte} array. 680 * 681 * @param initialCapacity estimate of number of bytes to be written 682 * @return {@code OutputStream} for building a {@code ByteString} 683 */ 684 public static Output newOutput(int initialCapacity) { 685 return new Output(initialCapacity); 686 } 687 688 /** 689 * Creates a new {@link Output}. Call {@link Output#toByteString()} to create 690 * the {@code ByteString} instance. 691 * <p> 692 * A {@link ByteString.Output} offers the same functionality as a 693 * {@link ByteArrayOutputStream}, except that it returns a {@link ByteString} 694 * rather than a {@code byte array}. 695 * 696 * @return {@code OutputStream} for building a {@code ByteString} 697 */ 698 public static Output newOutput() { 699 return new Output(CONCATENATE_BY_COPY_SIZE); 700 } 701 702 /** 703 * Outputs to a {@code ByteString} instance. Call {@link #toByteString()} to 704 * create the {@code ByteString} instance. 705 */ 706 public static final class Output extends OutputStream { 707 // Implementation note. 708 // The public methods of this class must be synchronized. ByteStrings 709 // are guaranteed to be immutable. Without some sort of locking, it could 710 // be possible for one thread to call toByteSring(), while another thread 711 // is still modifying the underlying byte array. 712 713 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; 714 // argument passed by user, indicating initial capacity. 715 private final int initialCapacity; 716 // ByteStrings to be concatenated to create the result 717 private final ArrayList<ByteString> flushedBuffers; 718 // Total number of bytes in the ByteStrings of flushedBuffers 719 private int flushedBuffersTotalBytes; 720 // Current buffer to which we are writing 721 private byte[] buffer; 722 // Location in buffer[] to which we write the next byte. 723 private int bufferPos; 724 725 /** 726 * Creates a new ByteString output stream with the specified 727 * initial capacity. 728 * 729 * @param initialCapacity the initial capacity of the output stream. 730 */ 731 Output(int initialCapacity) { 732 if (initialCapacity < 0) { 733 throw new IllegalArgumentException("Buffer size < 0"); 734 } 735 this.initialCapacity = initialCapacity; 736 this.flushedBuffers = new ArrayList<ByteString>(); 737 this.buffer = new byte[initialCapacity]; 738 } 739 740 @Override 741 public synchronized void write(int b) { 742 if (bufferPos == buffer.length) { 743 flushFullBuffer(1); 744 } 745 buffer[bufferPos++] = (byte)b; 746 } 747 748 @Override 749 public synchronized void write(byte[] b, int offset, int length) { 750 if (length <= buffer.length - bufferPos) { 751 // The bytes can fit into the current buffer. 752 System.arraycopy(b, offset, buffer, bufferPos, length); 753 bufferPos += length; 754 } else { 755 // Use up the current buffer 756 int copySize = buffer.length - bufferPos; 757 System.arraycopy(b, offset, buffer, bufferPos, copySize); 758 offset += copySize; 759 length -= copySize; 760 // Flush the buffer, and get a new buffer at least big enough to cover 761 // what we still need to output 762 flushFullBuffer(length); 763 System.arraycopy(b, offset, buffer, 0 /* count */, length); 764 bufferPos = length; 765 } 766 } 767 768 /** 769 * Creates a byte string. Its size is the current size of this output 770 * stream and its output has been copied to it. 771 * 772 * @return the current contents of this output stream, as a byte string. 773 */ 774 public synchronized ByteString toByteString() { 775 flushLastBuffer(); 776 return ByteString.copyFrom(flushedBuffers); 777 } 778 779 /** 780 * Implement java.util.Arrays.copyOf() for jdk 1.5. 781 */ 782 private byte[] copyArray(byte[] buffer, int length) { 783 byte[] result = new byte[length]; 784 System.arraycopy(buffer, 0, result, 0, Math.min(buffer.length, length)); 785 return result; 786 } 787 788 /** 789 * Writes the complete contents of this byte array output stream to 790 * the specified output stream argument. 791 * 792 * @param out the output stream to which to write the data. 793 * @throws IOException if an I/O error occurs. 794 */ 795 public void writeTo(OutputStream out) throws IOException { 796 ByteString[] cachedFlushBuffers; 797 byte[] cachedBuffer; 798 int cachedBufferPos; 799 synchronized (this) { 800 // Copy the information we need into local variables so as to hold 801 // the lock for as short a time as possible. 802 cachedFlushBuffers = 803 flushedBuffers.toArray(new ByteString[flushedBuffers.size()]); 804 cachedBuffer = buffer; 805 cachedBufferPos = bufferPos; 806 } 807 for (ByteString byteString : cachedFlushBuffers) { 808 byteString.writeTo(out); 809 } 810 811 out.write(copyArray(cachedBuffer, cachedBufferPos)); 812 } 813 814 /** 815 * Returns the current size of the output stream. 816 * 817 * @return the current size of the output stream 818 */ 819 public synchronized int size() { 820 return flushedBuffersTotalBytes + bufferPos; 821 } 822 823 /** 824 * Resets this stream, so that all currently accumulated output in the 825 * output stream is discarded. The output stream can be used again, 826 * reusing the already allocated buffer space. 827 */ 828 public synchronized void reset() { 829 flushedBuffers.clear(); 830 flushedBuffersTotalBytes = 0; 831 bufferPos = 0; 832 } 833 834 @Override 835 public String toString() { 836 return String.format("<ByteString.Output@%s size=%d>", 837 Integer.toHexString(System.identityHashCode(this)), size()); 838 } 839 840 /** 841 * Internal function used by writers. The current buffer is full, and the 842 * writer needs a new buffer whose size is at least the specified minimum 843 * size. 844 */ 845 private void flushFullBuffer(int minSize) { 846 flushedBuffers.add(new LiteralByteString(buffer)); 847 flushedBuffersTotalBytes += buffer.length; 848 // We want to increase our total capacity by 50%, but as a minimum, 849 // the new buffer should also at least be >= minSize and 850 // >= initial Capacity. 851 int newSize = Math.max(initialCapacity, 852 Math.max(minSize, flushedBuffersTotalBytes >>> 1)); 853 buffer = new byte[newSize]; 854 bufferPos = 0; 855 } 856 857 /** 858 * Internal function used by {@link #toByteString()}. The current buffer may 859 * or may not be full, but it needs to be flushed. 860 */ 861 private void flushLastBuffer() { 862 if (bufferPos < buffer.length) { 863 if (bufferPos > 0) { 864 byte[] bufferCopy = copyArray(buffer, bufferPos); 865 flushedBuffers.add(new LiteralByteString(bufferCopy)); 866 } 867 // We reuse this buffer for further writes. 868 } else { 869 // Buffer is completely full. Huzzah. 870 flushedBuffers.add(new LiteralByteString(buffer)); 871 // 99% of the time, we're not going to use this OutputStream again. 872 // We set buffer to an empty byte stream so that we're handling this 873 // case without wasting space. In the rare case that more writes 874 // *do* occur, this empty buffer will be flushed and an appropriately 875 // sized new buffer will be created. 876 buffer = EMPTY_BYTE_ARRAY; 877 } 878 flushedBuffersTotalBytes += bufferPos; 879 bufferPos = 0; 880 } 881 } 882 883 /** 884 * Constructs a new {@code ByteString} builder, which allows you to 885 * efficiently construct a {@code ByteString} by writing to a {@link 886 * CodedOutputStream}. Using this is much more efficient than calling {@code 887 * newOutput()} and wrapping that in a {@code CodedOutputStream}. 888 * 889 * <p>This is package-private because it's a somewhat confusing interface. 890 * Users can call {@link Message#toByteString()} instead of calling this 891 * directly. 892 * 893 * @param size The target byte size of the {@code ByteString}. You must write 894 * exactly this many bytes before building the result. 895 * @return the builder 896 */ 897 static CodedBuilder newCodedBuilder(int size) { 898 return new CodedBuilder(size); 899 } 900 901 /** See {@link ByteString#newCodedBuilder(int)}. */ 902 static final class CodedBuilder { 903 private final CodedOutputStream output; 904 private final byte[] buffer; 905 906 private CodedBuilder(int size) { 907 buffer = new byte[size]; 908 output = CodedOutputStream.newInstance(buffer); 909 } 910 911 public ByteString build() { 912 output.checkNoSpaceLeft(); 913 914 // We can be confident that the CodedOutputStream will not modify the 915 // underlying bytes anymore because it already wrote all of them. So, 916 // no need to make a copy. 917 return new LiteralByteString(buffer); 918 } 919 920 public CodedOutputStream getCodedOutput() { 921 return output; 922 } 923 } 924 925 // ================================================================= 926 // Methods {@link RopeByteString} needs on instances, which aren't part of the 927 // public API. 928 929 /** 930 * Return the depth of the tree representing this {@code ByteString}, if any, 931 * whose root is this node. If this is a leaf node, return 0. 932 * 933 * @return tree depth or zero 934 */ 935 protected abstract int getTreeDepth(); 936 937 /** 938 * Return {@code true} if this ByteString is literal (a leaf node) or a 939 * flat-enough tree in the sense of {@link RopeByteString}. 940 * 941 * @return true if the tree is flat enough 942 */ 943 protected abstract boolean isBalanced(); 944 945 /** 946 * Return the cached hash code if available. 947 * 948 * @return value of cached hash code or 0 if not computed yet 949 */ 950 protected abstract int peekCachedHashCode(); 951 952 /** 953 * Compute the hash across the value bytes starting with the given hash, and 954 * return the result. This is used to compute the hash across strings 955 * represented as a set of pieces by allowing the hash computation to be 956 * continued from piece to piece. 957 * 958 * @param h starting hash value 959 * @param offset offset into this value to start looking at data values 960 * @param length number of data values to include in the hash computation 961 * @return ending hash value 962 */ 963 protected abstract int partialHash(int h, int offset, int length); 964 965 @Override 966 public String toString() { 967 return String.format("<ByteString@%s size=%d>", 968 Integer.toHexString(System.identityHashCode(this)), size()); 969 } 970 } 971