1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import com.google.protobuf.Descriptors.Descriptor; 34 import com.google.protobuf.Descriptors.FieldDescriptor; 35 import com.google.protobuf.Descriptors.EnumDescriptor; 36 import com.google.protobuf.Descriptors.EnumValueDescriptor; 37 38 import java.io.IOException; 39 import java.nio.CharBuffer; 40 import java.math.BigInteger; 41 import java.util.ArrayList; 42 import java.util.List; 43 import java.util.Locale; 44 import java.util.Map; 45 import java.util.regex.Matcher; 46 import java.util.regex.Pattern; 47 48 /** 49 * Provide text parsing and formatting support for proto2 instances. 50 * The implementation largely follows google/protobuf/text_format.cc. 51 * 52 * @author wenboz (at) google.com Wenbo Zhu 53 * @author kenton (at) google.com Kenton Varda 54 */ 55 public final class TextFormat { 56 private TextFormat() {} 57 58 private static final Printer DEFAULT_PRINTER = new Printer(); 59 private static final Printer SINGLE_LINE_PRINTER = 60 (new Printer()).setSingleLineMode(true); 61 private static final Printer UNICODE_PRINTER = 62 (new Printer()).setEscapeNonAscii(false); 63 64 /** 65 * Outputs a textual representation of the Protocol Message supplied into 66 * the parameter output. (This representation is the new version of the 67 * classic "ProtocolPrinter" output from the original Protocol Buffer system) 68 */ 69 public static void print(final MessageOrBuilder message, final Appendable output) 70 throws IOException { 71 DEFAULT_PRINTER.print(message, new TextGenerator(output)); 72 } 73 74 /** Outputs a textual representation of {@code fields} to {@code output}. */ 75 public static void print(final UnknownFieldSet fields, 76 final Appendable output) 77 throws IOException { 78 DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output)); 79 } 80 81 /** 82 * Generates a human readable form of this message, useful for debugging and 83 * other purposes, with no newline characters. 84 */ 85 public static String shortDebugString(final MessageOrBuilder message) { 86 try { 87 final StringBuilder sb = new StringBuilder(); 88 SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb)); 89 // Single line mode currently might have an extra space at the end. 90 return sb.toString().trim(); 91 } catch (IOException e) { 92 throw new IllegalStateException(e); 93 } 94 } 95 96 /** 97 * Generates a human readable form of the unknown fields, useful for debugging 98 * and other purposes, with no newline characters. 99 */ 100 public static String shortDebugString(final UnknownFieldSet fields) { 101 try { 102 final StringBuilder sb = new StringBuilder(); 103 SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb)); 104 // Single line mode currently might have an extra space at the end. 105 return sb.toString().trim(); 106 } catch (IOException e) { 107 throw new IllegalStateException(e); 108 } 109 } 110 111 /** 112 * Like {@code print()}, but writes directly to a {@code String} and 113 * returns it. 114 */ 115 public static String printToString(final MessageOrBuilder message) { 116 try { 117 final StringBuilder text = new StringBuilder(); 118 print(message, text); 119 return text.toString(); 120 } catch (IOException e) { 121 throw new IllegalStateException(e); 122 } 123 } 124 125 /** 126 * Like {@code print()}, but writes directly to a {@code String} and 127 * returns it. 128 */ 129 public static String printToString(final UnknownFieldSet fields) { 130 try { 131 final StringBuilder text = new StringBuilder(); 132 print(fields, text); 133 return text.toString(); 134 } catch (IOException e) { 135 throw new IllegalStateException(e); 136 } 137 } 138 139 /** 140 * Same as {@code printToString()}, except that non-ASCII characters 141 * in string type fields are not escaped in backslash+octals. 142 */ 143 public static String printToUnicodeString(final MessageOrBuilder message) { 144 try { 145 final StringBuilder text = new StringBuilder(); 146 UNICODE_PRINTER.print(message, new TextGenerator(text)); 147 return text.toString(); 148 } catch (IOException e) { 149 throw new IllegalStateException(e); 150 } 151 } 152 153 /** 154 * Same as {@code printToString()}, except that non-ASCII characters 155 * in string type fields are not escaped in backslash+octals. 156 */ 157 public static String printToUnicodeString(final UnknownFieldSet fields) { 158 try { 159 final StringBuilder text = new StringBuilder(); 160 UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text)); 161 return text.toString(); 162 } catch (IOException e) { 163 throw new IllegalStateException(e); 164 } 165 } 166 167 public static void printField(final FieldDescriptor field, 168 final Object value, 169 final Appendable output) 170 throws IOException { 171 DEFAULT_PRINTER.printField(field, value, new TextGenerator(output)); 172 } 173 174 public static String printFieldToString(final FieldDescriptor field, 175 final Object value) { 176 try { 177 final StringBuilder text = new StringBuilder(); 178 printField(field, value, text); 179 return text.toString(); 180 } catch (IOException e) { 181 throw new IllegalStateException(e); 182 } 183 } 184 185 /** 186 * Outputs a textual representation of the value of given field value. 187 * 188 * @param field the descriptor of the field 189 * @param value the value of the field 190 * @param output the output to which to append the formatted value 191 * @throws ClassCastException if the value is not appropriate for the 192 * given field descriptor 193 * @throws IOException if there is an exception writing to the output 194 */ 195 public static void printFieldValue(final FieldDescriptor field, 196 final Object value, 197 final Appendable output) 198 throws IOException { 199 DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output)); 200 } 201 202 /** 203 * Outputs a textual representation of the value of an unknown field. 204 * 205 * @param tag the field's tag number 206 * @param value the value of the field 207 * @param output the output to which to append the formatted value 208 * @throws ClassCastException if the value is not appropriate for the 209 * given field descriptor 210 * @throws IOException if there is an exception writing to the output 211 */ 212 public static void printUnknownFieldValue(final int tag, 213 final Object value, 214 final Appendable output) 215 throws IOException { 216 printUnknownFieldValue(tag, value, new TextGenerator(output)); 217 } 218 219 private static void printUnknownFieldValue(final int tag, 220 final Object value, 221 final TextGenerator generator) 222 throws IOException { 223 switch (WireFormat.getTagWireType(tag)) { 224 case WireFormat.WIRETYPE_VARINT: 225 generator.print(unsignedToString((Long) value)); 226 break; 227 case WireFormat.WIRETYPE_FIXED32: 228 generator.print( 229 String.format((Locale) null, "0x%08x", (Integer) value)); 230 break; 231 case WireFormat.WIRETYPE_FIXED64: 232 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 233 break; 234 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 235 generator.print("\""); 236 generator.print(escapeBytes((ByteString) value)); 237 generator.print("\""); 238 break; 239 case WireFormat.WIRETYPE_START_GROUP: 240 DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator); 241 break; 242 default: 243 throw new IllegalArgumentException("Bad tag: " + tag); 244 } 245 } 246 247 /** Helper class for converting protobufs to text. */ 248 private static final class Printer { 249 /** Whether to omit newlines from the output. */ 250 boolean singleLineMode = false; 251 252 /** Whether to escape non ASCII characters with backslash and octal. */ 253 boolean escapeNonAscii = true; 254 255 private Printer() {} 256 257 /** Setter of singleLineMode */ 258 private Printer setSingleLineMode(boolean singleLineMode) { 259 this.singleLineMode = singleLineMode; 260 return this; 261 } 262 263 /** Setter of escapeNonAscii */ 264 private Printer setEscapeNonAscii(boolean escapeNonAscii) { 265 this.escapeNonAscii = escapeNonAscii; 266 return this; 267 } 268 269 private void print(final MessageOrBuilder message, final TextGenerator generator) 270 throws IOException { 271 for (Map.Entry<FieldDescriptor, Object> field 272 : message.getAllFields().entrySet()) { 273 printField(field.getKey(), field.getValue(), generator); 274 } 275 printUnknownFields(message.getUnknownFields(), generator); 276 } 277 278 private void printField(final FieldDescriptor field, final Object value, 279 final TextGenerator generator) throws IOException { 280 if (field.isRepeated()) { 281 // Repeated field. Print each element. 282 for (Object element : (List<?>) value) { 283 printSingleField(field, element, generator); 284 } 285 } else { 286 printSingleField(field, value, generator); 287 } 288 } 289 290 private void printSingleField(final FieldDescriptor field, 291 final Object value, 292 final TextGenerator generator) 293 throws IOException { 294 if (field.isExtension()) { 295 generator.print("["); 296 // We special-case MessageSet elements for compatibility with proto1. 297 if (field.getContainingType().getOptions().getMessageSetWireFormat() 298 && (field.getType() == FieldDescriptor.Type.MESSAGE) 299 && (field.isOptional()) 300 // object equality 301 && (field.getExtensionScope() == field.getMessageType())) { 302 generator.print(field.getMessageType().getFullName()); 303 } else { 304 generator.print(field.getFullName()); 305 } 306 generator.print("]"); 307 } else { 308 if (field.getType() == FieldDescriptor.Type.GROUP) { 309 // Groups must be serialized with their original capitalization. 310 generator.print(field.getMessageType().getName()); 311 } else { 312 generator.print(field.getName()); 313 } 314 } 315 316 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 317 if (singleLineMode) { 318 generator.print(" { "); 319 } else { 320 generator.print(" {\n"); 321 generator.indent(); 322 } 323 } else { 324 generator.print(": "); 325 } 326 327 printFieldValue(field, value, generator); 328 329 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 330 if (singleLineMode) { 331 generator.print("} "); 332 } else { 333 generator.outdent(); 334 generator.print("}\n"); 335 } 336 } else { 337 if (singleLineMode) { 338 generator.print(" "); 339 } else { 340 generator.print("\n"); 341 } 342 } 343 } 344 345 private void printFieldValue(final FieldDescriptor field, 346 final Object value, 347 final TextGenerator generator) 348 throws IOException { 349 switch (field.getType()) { 350 case INT32: 351 case SINT32: 352 case SFIXED32: 353 generator.print(((Integer) value).toString()); 354 break; 355 356 case INT64: 357 case SINT64: 358 case SFIXED64: 359 generator.print(((Long) value).toString()); 360 break; 361 362 case BOOL: 363 generator.print(((Boolean) value).toString()); 364 break; 365 366 case FLOAT: 367 generator.print(((Float) value).toString()); 368 break; 369 370 case DOUBLE: 371 generator.print(((Double) value).toString()); 372 break; 373 374 case UINT32: 375 case FIXED32: 376 generator.print(unsignedToString((Integer) value)); 377 break; 378 379 case UINT64: 380 case FIXED64: 381 generator.print(unsignedToString((Long) value)); 382 break; 383 384 case STRING: 385 generator.print("\""); 386 generator.print(escapeNonAscii ? 387 escapeText((String) value) : 388 (String) value); 389 generator.print("\""); 390 break; 391 392 case BYTES: 393 generator.print("\""); 394 generator.print(escapeBytes((ByteString) value)); 395 generator.print("\""); 396 break; 397 398 case ENUM: 399 generator.print(((EnumValueDescriptor) value).getName()); 400 break; 401 402 case MESSAGE: 403 case GROUP: 404 print((Message) value, generator); 405 break; 406 } 407 } 408 409 private void printUnknownFields(final UnknownFieldSet unknownFields, 410 final TextGenerator generator) 411 throws IOException { 412 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : 413 unknownFields.asMap().entrySet()) { 414 final int number = entry.getKey(); 415 final UnknownFieldSet.Field field = entry.getValue(); 416 printUnknownField(number, WireFormat.WIRETYPE_VARINT, 417 field.getVarintList(), generator); 418 printUnknownField(number, WireFormat.WIRETYPE_FIXED32, 419 field.getFixed32List(), generator); 420 printUnknownField(number, WireFormat.WIRETYPE_FIXED64, 421 field.getFixed64List(), generator); 422 printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED, 423 field.getLengthDelimitedList(), generator); 424 for (final UnknownFieldSet value : field.getGroupList()) { 425 generator.print(entry.getKey().toString()); 426 if (singleLineMode) { 427 generator.print(" { "); 428 } else { 429 generator.print(" {\n"); 430 generator.indent(); 431 } 432 printUnknownFields(value, generator); 433 if (singleLineMode) { 434 generator.print("} "); 435 } else { 436 generator.outdent(); 437 generator.print("}\n"); 438 } 439 } 440 } 441 } 442 443 private void printUnknownField(final int number, 444 final int wireType, 445 final List<?> values, 446 final TextGenerator generator) 447 throws IOException { 448 for (final Object value : values) { 449 generator.print(String.valueOf(number)); 450 generator.print(": "); 451 printUnknownFieldValue(wireType, value, generator); 452 generator.print(singleLineMode ? " " : "\n"); 453 } 454 } 455 } 456 457 /** Convert an unsigned 32-bit integer to a string. */ 458 private static String unsignedToString(final int value) { 459 if (value >= 0) { 460 return Integer.toString(value); 461 } else { 462 return Long.toString(((long) value) & 0x00000000FFFFFFFFL); 463 } 464 } 465 466 /** Convert an unsigned 64-bit integer to a string. */ 467 private static String unsignedToString(final long value) { 468 if (value >= 0) { 469 return Long.toString(value); 470 } else { 471 // Pull off the most-significant bit so that BigInteger doesn't think 472 // the number is negative, then set it again using setBit(). 473 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL) 474 .setBit(63).toString(); 475 } 476 } 477 478 /** 479 * An inner class for writing text to the output stream. 480 */ 481 private static final class TextGenerator { 482 private final Appendable output; 483 private final StringBuilder indent = new StringBuilder(); 484 private boolean atStartOfLine = true; 485 486 private TextGenerator(final Appendable output) { 487 this.output = output; 488 } 489 490 /** 491 * Indent text by two spaces. After calling Indent(), two spaces will be 492 * inserted at the beginning of each line of text. Indent() may be called 493 * multiple times to produce deeper indents. 494 */ 495 public void indent() { 496 indent.append(" "); 497 } 498 499 /** 500 * Reduces the current indent level by two spaces, or crashes if the indent 501 * level is zero. 502 */ 503 public void outdent() { 504 final int length = indent.length(); 505 if (length == 0) { 506 throw new IllegalArgumentException( 507 " Outdent() without matching Indent()."); 508 } 509 indent.delete(length - 2, length); 510 } 511 512 /** 513 * Print text to the output stream. 514 */ 515 public void print(final CharSequence text) throws IOException { 516 final int size = text.length(); 517 int pos = 0; 518 519 for (int i = 0; i < size; i++) { 520 if (text.charAt(i) == '\n') { 521 write(text.subSequence(pos, size), i - pos + 1); 522 pos = i + 1; 523 atStartOfLine = true; 524 } 525 } 526 write(text.subSequence(pos, size), size - pos); 527 } 528 529 private void write(final CharSequence data, final int size) 530 throws IOException { 531 if (size == 0) { 532 return; 533 } 534 if (atStartOfLine) { 535 atStartOfLine = false; 536 output.append(indent); 537 } 538 output.append(data); 539 } 540 } 541 542 // ================================================================= 543 // Parsing 544 545 /** 546 * Represents a stream of tokens parsed from a {@code String}. 547 * 548 * <p>The Java standard library provides many classes that you might think 549 * would be useful for implementing this, but aren't. For example: 550 * 551 * <ul> 552 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, 553 * at least, something that would get us close to what we want -- except 554 * for one fatal flaw: It automatically un-escapes strings using Java 555 * escape sequences, which do not include all the escape sequences we 556 * need to support (e.g. '\x'). 557 * <li>{@code java.util.Scanner}: This seems like a great way at least to 558 * parse regular expressions out of a stream (so we wouldn't have to load 559 * the entire input into a single string before parsing). Sadly, 560 * {@code Scanner} requires that tokens be delimited with some delimiter. 561 * Thus, although the text "foo:" should parse to two tokens ("foo" and 562 * ":"), {@code Scanner} would recognize it only as a single token. 563 * Furthermore, {@code Scanner} provides no way to inspect the contents 564 * of delimiters, making it impossible to keep track of line and column 565 * numbers. 566 * </ul> 567 * 568 * <p>Luckily, Java's regular expression support does manage to be useful to 569 * us. (Barely: We need {@code Matcher.usePattern()}, which is new in 570 * Java 1.5.) So, we can use that, at least. Unfortunately, this implies 571 * that we need to have the entire input in one contiguous string. 572 */ 573 private static final class Tokenizer { 574 private final CharSequence text; 575 private final Matcher matcher; 576 private String currentToken; 577 578 // The character index within this.text at which the current token begins. 579 private int pos = 0; 580 581 // The line and column numbers of the current token. 582 private int line = 0; 583 private int column = 0; 584 585 // The line and column numbers of the previous token (allows throwing 586 // errors *after* consuming). 587 private int previousLine = 0; 588 private int previousColumn = 0; 589 590 // We use possessive quantifiers (*+ and ++) because otherwise the Java 591 // regex matcher has stack overflows on large inputs. 592 private static final Pattern WHITESPACE = 593 Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 594 private static final Pattern TOKEN = Pattern.compile( 595 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier 596 "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number 597 "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string 598 "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 599 Pattern.MULTILINE); 600 601 private static final Pattern DOUBLE_INFINITY = Pattern.compile( 602 "-?inf(inity)?", 603 Pattern.CASE_INSENSITIVE); 604 private static final Pattern FLOAT_INFINITY = Pattern.compile( 605 "-?inf(inity)?f?", 606 Pattern.CASE_INSENSITIVE); 607 private static final Pattern FLOAT_NAN = Pattern.compile( 608 "nanf?", 609 Pattern.CASE_INSENSITIVE); 610 611 /** Construct a tokenizer that parses tokens from the given text. */ 612 private Tokenizer(final CharSequence text) { 613 this.text = text; 614 this.matcher = WHITESPACE.matcher(text); 615 skipWhitespace(); 616 nextToken(); 617 } 618 619 /** Are we at the end of the input? */ 620 public boolean atEnd() { 621 return currentToken.length() == 0; 622 } 623 624 /** Advance to the next token. */ 625 public void nextToken() { 626 previousLine = line; 627 previousColumn = column; 628 629 // Advance the line counter to the current position. 630 while (pos < matcher.regionStart()) { 631 if (text.charAt(pos) == '\n') { 632 ++line; 633 column = 0; 634 } else { 635 ++column; 636 } 637 ++pos; 638 } 639 640 // Match the next token. 641 if (matcher.regionStart() == matcher.regionEnd()) { 642 // EOF 643 currentToken = ""; 644 } else { 645 matcher.usePattern(TOKEN); 646 if (matcher.lookingAt()) { 647 currentToken = matcher.group(); 648 matcher.region(matcher.end(), matcher.regionEnd()); 649 } else { 650 // Take one character. 651 currentToken = String.valueOf(text.charAt(pos)); 652 matcher.region(pos + 1, matcher.regionEnd()); 653 } 654 655 skipWhitespace(); 656 } 657 } 658 659 /** 660 * Skip over any whitespace so that the matcher region starts at the next 661 * token. 662 */ 663 private void skipWhitespace() { 664 matcher.usePattern(WHITESPACE); 665 if (matcher.lookingAt()) { 666 matcher.region(matcher.end(), matcher.regionEnd()); 667 } 668 } 669 670 /** 671 * If the next token exactly matches {@code token}, consume it and return 672 * {@code true}. Otherwise, return {@code false} without doing anything. 673 */ 674 public boolean tryConsume(final String token) { 675 if (currentToken.equals(token)) { 676 nextToken(); 677 return true; 678 } else { 679 return false; 680 } 681 } 682 683 /** 684 * If the next token exactly matches {@code token}, consume it. Otherwise, 685 * throw a {@link ParseException}. 686 */ 687 public void consume(final String token) throws ParseException { 688 if (!tryConsume(token)) { 689 throw parseException("Expected \"" + token + "\"."); 690 } 691 } 692 693 /** 694 * Returns {@code true} if the next token is an integer, but does 695 * not consume it. 696 */ 697 public boolean lookingAtInteger() { 698 if (currentToken.length() == 0) { 699 return false; 700 } 701 702 final char c = currentToken.charAt(0); 703 return ('0' <= c && c <= '9') || 704 c == '-' || c == '+'; 705 } 706 707 /** 708 * If the next token is an identifier, consume it and return its value. 709 * Otherwise, throw a {@link ParseException}. 710 */ 711 public String consumeIdentifier() throws ParseException { 712 for (int i = 0; i < currentToken.length(); i++) { 713 final char c = currentToken.charAt(i); 714 if (('a' <= c && c <= 'z') || 715 ('A' <= c && c <= 'Z') || 716 ('0' <= c && c <= '9') || 717 (c == '_') || (c == '.')) { 718 // OK 719 } else { 720 throw parseException("Expected identifier."); 721 } 722 } 723 724 final String result = currentToken; 725 nextToken(); 726 return result; 727 } 728 729 /** 730 * If the next token is a 32-bit signed integer, consume it and return its 731 * value. Otherwise, throw a {@link ParseException}. 732 */ 733 public int consumeInt32() throws ParseException { 734 try { 735 final int result = parseInt32(currentToken); 736 nextToken(); 737 return result; 738 } catch (NumberFormatException e) { 739 throw integerParseException(e); 740 } 741 } 742 743 /** 744 * If the next token is a 32-bit unsigned integer, consume it and return its 745 * value. Otherwise, throw a {@link ParseException}. 746 */ 747 public int consumeUInt32() throws ParseException { 748 try { 749 final int result = parseUInt32(currentToken); 750 nextToken(); 751 return result; 752 } catch (NumberFormatException e) { 753 throw integerParseException(e); 754 } 755 } 756 757 /** 758 * If the next token is a 64-bit signed integer, consume it and return its 759 * value. Otherwise, throw a {@link ParseException}. 760 */ 761 public long consumeInt64() throws ParseException { 762 try { 763 final long result = parseInt64(currentToken); 764 nextToken(); 765 return result; 766 } catch (NumberFormatException e) { 767 throw integerParseException(e); 768 } 769 } 770 771 /** 772 * If the next token is a 64-bit unsigned integer, consume it and return its 773 * value. Otherwise, throw a {@link ParseException}. 774 */ 775 public long consumeUInt64() throws ParseException { 776 try { 777 final long result = parseUInt64(currentToken); 778 nextToken(); 779 return result; 780 } catch (NumberFormatException e) { 781 throw integerParseException(e); 782 } 783 } 784 785 /** 786 * If the next token is a double, consume it and return its value. 787 * Otherwise, throw a {@link ParseException}. 788 */ 789 public double consumeDouble() throws ParseException { 790 // We need to parse infinity and nan separately because 791 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 792 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 793 final boolean negative = currentToken.startsWith("-"); 794 nextToken(); 795 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 796 } 797 if (currentToken.equalsIgnoreCase("nan")) { 798 nextToken(); 799 return Double.NaN; 800 } 801 try { 802 final double result = Double.parseDouble(currentToken); 803 nextToken(); 804 return result; 805 } catch (NumberFormatException e) { 806 throw floatParseException(e); 807 } 808 } 809 810 /** 811 * If the next token is a float, consume it and return its value. 812 * Otherwise, throw a {@link ParseException}. 813 */ 814 public float consumeFloat() throws ParseException { 815 // We need to parse infinity and nan separately because 816 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 817 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 818 final boolean negative = currentToken.startsWith("-"); 819 nextToken(); 820 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 821 } 822 if (FLOAT_NAN.matcher(currentToken).matches()) { 823 nextToken(); 824 return Float.NaN; 825 } 826 try { 827 final float result = Float.parseFloat(currentToken); 828 nextToken(); 829 return result; 830 } catch (NumberFormatException e) { 831 throw floatParseException(e); 832 } 833 } 834 835 /** 836 * If the next token is a boolean, consume it and return its value. 837 * Otherwise, throw a {@link ParseException}. 838 */ 839 public boolean consumeBoolean() throws ParseException { 840 if (currentToken.equals("true") || 841 currentToken.equals("t") || 842 currentToken.equals("1")) { 843 nextToken(); 844 return true; 845 } else if (currentToken.equals("false") || 846 currentToken.equals("f") || 847 currentToken.equals("0")) { 848 nextToken(); 849 return false; 850 } else { 851 throw parseException("Expected \"true\" or \"false\"."); 852 } 853 } 854 855 /** 856 * If the next token is a string, consume it and return its (unescaped) 857 * value. Otherwise, throw a {@link ParseException}. 858 */ 859 public String consumeString() throws ParseException { 860 return consumeByteString().toStringUtf8(); 861 } 862 863 /** 864 * If the next token is a string, consume it, unescape it as a 865 * {@link ByteString}, and return it. Otherwise, throw a 866 * {@link ParseException}. 867 */ 868 public ByteString consumeByteString() throws ParseException { 869 List<ByteString> list = new ArrayList<ByteString>(); 870 consumeByteString(list); 871 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 872 consumeByteString(list); 873 } 874 return ByteString.copyFrom(list); 875 } 876 877 /** 878 * Like {@link #consumeByteString()} but adds each token of the string to 879 * the given list. String literals (whether bytes or text) may come in 880 * multiple adjacent tokens which are automatically concatenated, like in 881 * C or Python. 882 */ 883 private void consumeByteString(List<ByteString> list) throws ParseException { 884 final char quote = currentToken.length() > 0 ? currentToken.charAt(0) 885 : '\0'; 886 if (quote != '\"' && quote != '\'') { 887 throw parseException("Expected string."); 888 } 889 890 if (currentToken.length() < 2 || 891 currentToken.charAt(currentToken.length() - 1) != quote) { 892 throw parseException("String missing ending quote."); 893 } 894 895 try { 896 final String escaped = 897 currentToken.substring(1, currentToken.length() - 1); 898 final ByteString result = unescapeBytes(escaped); 899 nextToken(); 900 list.add(result); 901 } catch (InvalidEscapeSequenceException e) { 902 throw parseException(e.getMessage()); 903 } 904 } 905 906 /** 907 * Returns a {@link ParseException} with the current line and column 908 * numbers in the description, suitable for throwing. 909 */ 910 public ParseException parseException(final String description) { 911 // Note: People generally prefer one-based line and column numbers. 912 return new ParseException( 913 line + 1, column + 1, description); 914 } 915 916 /** 917 * Returns a {@link ParseException} with the line and column numbers of 918 * the previous token in the description, suitable for throwing. 919 */ 920 public ParseException parseExceptionPreviousToken( 921 final String description) { 922 // Note: People generally prefer one-based line and column numbers. 923 return new ParseException( 924 previousLine + 1, previousColumn + 1, description); 925 } 926 927 /** 928 * Constructs an appropriate {@link ParseException} for the given 929 * {@code NumberFormatException} when trying to parse an integer. 930 */ 931 private ParseException integerParseException( 932 final NumberFormatException e) { 933 return parseException("Couldn't parse integer: " + e.getMessage()); 934 } 935 936 /** 937 * Constructs an appropriate {@link ParseException} for the given 938 * {@code NumberFormatException} when trying to parse a float or double. 939 */ 940 private ParseException floatParseException(final NumberFormatException e) { 941 return parseException("Couldn't parse number: " + e.getMessage()); 942 } 943 } 944 945 /** Thrown when parsing an invalid text format message. */ 946 public static class ParseException extends IOException { 947 private static final long serialVersionUID = 3196188060225107702L; 948 949 private final int line; 950 private final int column; 951 952 /** Create a new instance, with -1 as the line and column numbers. */ 953 public ParseException(final String message) { 954 this(-1, -1, message); 955 } 956 957 /** 958 * Create a new instance 959 * 960 * @param line the line number where the parse error occurred, 961 * using 1-offset. 962 * @param column the column number where the parser error occurred, 963 * using 1-offset. 964 */ 965 public ParseException(final int line, final int column, 966 final String message) { 967 super(Integer.toString(line) + ":" + column + ": " + message); 968 this.line = line; 969 this.column = column; 970 } 971 972 /** 973 * Return the line where the parse exception occurred, or -1 when 974 * none is provided. The value is specified as 1-offset, so the first 975 * line is line 1. 976 */ 977 public int getLine() { 978 return line; 979 } 980 981 /** 982 * Return the column where the parse exception occurred, or -1 when 983 * none is provided. The value is specified as 1-offset, so the first 984 * line is line 1. 985 */ 986 public int getColumn() { 987 return column; 988 } 989 } 990 991 /** 992 * Parse a text-format message from {@code input} and merge the contents 993 * into {@code builder}. 994 */ 995 public static void merge(final Readable input, 996 final Message.Builder builder) 997 throws IOException { 998 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 999 } 1000 1001 /** 1002 * Parse a text-format message from {@code input} and merge the contents 1003 * into {@code builder}. 1004 */ 1005 public static void merge(final CharSequence input, 1006 final Message.Builder builder) 1007 throws ParseException { 1008 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1009 } 1010 1011 /** 1012 * Parse a text-format message from {@code input} and merge the contents 1013 * into {@code builder}. Extensions will be recognized if they are 1014 * registered in {@code extensionRegistry}. 1015 */ 1016 public static void merge(final Readable input, 1017 final ExtensionRegistry extensionRegistry, 1018 final Message.Builder builder) 1019 throws IOException { 1020 // Read the entire input to a String then parse that. 1021 1022 // If StreamTokenizer were not quite so crippled, or if there were a kind 1023 // of Reader that could read in chunks that match some particular regex, 1024 // or if we wanted to write a custom Reader to tokenize our stream, then 1025 // we would not have to read to one big String. Alas, none of these is 1026 // the case. Oh well. 1027 1028 merge(toStringBuilder(input), extensionRegistry, builder); 1029 } 1030 1031 private static final int BUFFER_SIZE = 4096; 1032 1033 // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) 1034 // overhead is worthwhile 1035 private static StringBuilder toStringBuilder(final Readable input) 1036 throws IOException { 1037 final StringBuilder text = new StringBuilder(); 1038 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1039 while (true) { 1040 final int n = input.read(buffer); 1041 if (n == -1) { 1042 break; 1043 } 1044 buffer.flip(); 1045 text.append(buffer, 0, n); 1046 } 1047 return text; 1048 } 1049 1050 /** 1051 * Parse a text-format message from {@code input} and merge the contents 1052 * into {@code builder}. Extensions will be recognized if they are 1053 * registered in {@code extensionRegistry}. 1054 */ 1055 public static void merge(final CharSequence input, 1056 final ExtensionRegistry extensionRegistry, 1057 final Message.Builder builder) 1058 throws ParseException { 1059 final Tokenizer tokenizer = new Tokenizer(input); 1060 1061 while (!tokenizer.atEnd()) { 1062 mergeField(tokenizer, extensionRegistry, builder); 1063 } 1064 } 1065 1066 /** 1067 * Parse a single field from {@code tokenizer} and merge it into 1068 * {@code builder}. 1069 */ 1070 private static void mergeField(final Tokenizer tokenizer, 1071 final ExtensionRegistry extensionRegistry, 1072 final Message.Builder builder) 1073 throws ParseException { 1074 FieldDescriptor field; 1075 final Descriptor type = builder.getDescriptorForType(); 1076 ExtensionRegistry.ExtensionInfo extension = null; 1077 1078 if (tokenizer.tryConsume("[")) { 1079 // An extension. 1080 final StringBuilder name = 1081 new StringBuilder(tokenizer.consumeIdentifier()); 1082 while (tokenizer.tryConsume(".")) { 1083 name.append('.'); 1084 name.append(tokenizer.consumeIdentifier()); 1085 } 1086 1087 extension = extensionRegistry.findExtensionByName(name.toString()); 1088 1089 if (extension == null) { 1090 throw tokenizer.parseExceptionPreviousToken( 1091 "Extension \"" + name + "\" not found in the ExtensionRegistry."); 1092 } else if (extension.descriptor.getContainingType() != type) { 1093 throw tokenizer.parseExceptionPreviousToken( 1094 "Extension \"" + name + "\" does not extend message type \"" + 1095 type.getFullName() + "\"."); 1096 } 1097 1098 tokenizer.consume("]"); 1099 1100 field = extension.descriptor; 1101 } else { 1102 final String name = tokenizer.consumeIdentifier(); 1103 field = type.findFieldByName(name); 1104 1105 // Group names are expected to be capitalized as they appear in the 1106 // .proto file, which actually matches their type names, not their field 1107 // names. 1108 if (field == null) { 1109 // Explicitly specify US locale so that this code does not break when 1110 // executing in Turkey. 1111 final String lowerName = name.toLowerCase(Locale.US); 1112 field = type.findFieldByName(lowerName); 1113 // If the case-insensitive match worked but the field is NOT a group, 1114 if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { 1115 field = null; 1116 } 1117 } 1118 // Again, special-case group names as described above. 1119 if (field != null && field.getType() == FieldDescriptor.Type.GROUP && 1120 !field.getMessageType().getName().equals(name)) { 1121 field = null; 1122 } 1123 1124 if (field == null) { 1125 throw tokenizer.parseExceptionPreviousToken( 1126 "Message type \"" + type.getFullName() + 1127 "\" has no field named \"" + name + "\"."); 1128 } 1129 } 1130 1131 Object value = null; 1132 1133 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1134 tokenizer.tryConsume(":"); // optional 1135 1136 final String endToken; 1137 if (tokenizer.tryConsume("<")) { 1138 endToken = ">"; 1139 } else { 1140 tokenizer.consume("{"); 1141 endToken = "}"; 1142 } 1143 1144 final Message.Builder subBuilder; 1145 if (extension == null) { 1146 subBuilder = builder.newBuilderForField(field); 1147 } else { 1148 subBuilder = extension.defaultInstance.newBuilderForType(); 1149 } 1150 1151 while (!tokenizer.tryConsume(endToken)) { 1152 if (tokenizer.atEnd()) { 1153 throw tokenizer.parseException( 1154 "Expected \"" + endToken + "\"."); 1155 } 1156 mergeField(tokenizer, extensionRegistry, subBuilder); 1157 } 1158 1159 value = subBuilder.buildPartial(); 1160 1161 } else { 1162 tokenizer.consume(":"); 1163 1164 switch (field.getType()) { 1165 case INT32: 1166 case SINT32: 1167 case SFIXED32: 1168 value = tokenizer.consumeInt32(); 1169 break; 1170 1171 case INT64: 1172 case SINT64: 1173 case SFIXED64: 1174 value = tokenizer.consumeInt64(); 1175 break; 1176 1177 case UINT32: 1178 case FIXED32: 1179 value = tokenizer.consumeUInt32(); 1180 break; 1181 1182 case UINT64: 1183 case FIXED64: 1184 value = tokenizer.consumeUInt64(); 1185 break; 1186 1187 case FLOAT: 1188 value = tokenizer.consumeFloat(); 1189 break; 1190 1191 case DOUBLE: 1192 value = tokenizer.consumeDouble(); 1193 break; 1194 1195 case BOOL: 1196 value = tokenizer.consumeBoolean(); 1197 break; 1198 1199 case STRING: 1200 value = tokenizer.consumeString(); 1201 break; 1202 1203 case BYTES: 1204 value = tokenizer.consumeByteString(); 1205 break; 1206 1207 case ENUM: 1208 final EnumDescriptor enumType = field.getEnumType(); 1209 1210 if (tokenizer.lookingAtInteger()) { 1211 final int number = tokenizer.consumeInt32(); 1212 value = enumType.findValueByNumber(number); 1213 if (value == null) { 1214 throw tokenizer.parseExceptionPreviousToken( 1215 "Enum type \"" + enumType.getFullName() + 1216 "\" has no value with number " + number + '.'); 1217 } 1218 } else { 1219 final String id = tokenizer.consumeIdentifier(); 1220 value = enumType.findValueByName(id); 1221 if (value == null) { 1222 throw tokenizer.parseExceptionPreviousToken( 1223 "Enum type \"" + enumType.getFullName() + 1224 "\" has no value named \"" + id + "\"."); 1225 } 1226 } 1227 1228 break; 1229 1230 case MESSAGE: 1231 case GROUP: 1232 throw new RuntimeException("Can't get here."); 1233 } 1234 } 1235 1236 if (field.isRepeated()) { 1237 builder.addRepeatedField(field, value); 1238 } else { 1239 builder.setField(field, value); 1240 } 1241 } 1242 1243 // ================================================================= 1244 // Utility functions 1245 // 1246 // Some of these methods are package-private because Descriptors.java uses 1247 // them. 1248 1249 /** 1250 * Escapes bytes in the format used in protocol buffer text format, which 1251 * is the same as the format used for C string literals. All bytes 1252 * that are not printable 7-bit ASCII characters are escaped, as well as 1253 * backslash, single-quote, and double-quote characters. Characters for 1254 * which no defined short-hand escape sequence is defined will be escaped 1255 * using 3-digit octal sequences. 1256 */ 1257 static String escapeBytes(final ByteString input) { 1258 final StringBuilder builder = new StringBuilder(input.size()); 1259 for (int i = 0; i < input.size(); i++) { 1260 final byte b = input.byteAt(i); 1261 switch (b) { 1262 // Java does not recognize \a or \v, apparently. 1263 case 0x07: builder.append("\\a" ); break; 1264 case '\b': builder.append("\\b" ); break; 1265 case '\f': builder.append("\\f" ); break; 1266 case '\n': builder.append("\\n" ); break; 1267 case '\r': builder.append("\\r" ); break; 1268 case '\t': builder.append("\\t" ); break; 1269 case 0x0b: builder.append("\\v" ); break; 1270 case '\\': builder.append("\\\\"); break; 1271 case '\'': builder.append("\\\'"); break; 1272 case '"' : builder.append("\\\""); break; 1273 default: 1274 // Note: Bytes with the high-order bit set should be escaped. Since 1275 // bytes are signed, such bytes will compare less than 0x20, hence 1276 // the following line is correct. 1277 if (b >= 0x20) { 1278 builder.append((char) b); 1279 } else { 1280 builder.append('\\'); 1281 builder.append((char) ('0' + ((b >>> 6) & 3))); 1282 builder.append((char) ('0' + ((b >>> 3) & 7))); 1283 builder.append((char) ('0' + (b & 7))); 1284 } 1285 break; 1286 } 1287 } 1288 return builder.toString(); 1289 } 1290 1291 /** 1292 * Un-escape a byte sequence as escaped using 1293 * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with 1294 * "\x") are also recognized. 1295 */ 1296 static ByteString unescapeBytes(final CharSequence charString) 1297 throws InvalidEscapeSequenceException { 1298 // First convert the Java character sequence to UTF-8 bytes. 1299 ByteString input = ByteString.copyFromUtf8(charString.toString()); 1300 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 1301 // escapes can all be expressed with ASCII characters, so it is safe to 1302 // operate on bytes here. 1303 // 1304 // Unescaping the input byte array will result in a byte sequence that's no 1305 // longer than the input. That's because each escape sequence is between 1306 // two and four bytes long and stands for a single byte. 1307 final byte[] result = new byte[input.size()]; 1308 int pos = 0; 1309 for (int i = 0; i < input.size(); i++) { 1310 byte c = input.byteAt(i); 1311 if (c == '\\') { 1312 if (i + 1 < input.size()) { 1313 ++i; 1314 c = input.byteAt(i); 1315 if (isOctal(c)) { 1316 // Octal escape. 1317 int code = digitValue(c); 1318 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 1319 ++i; 1320 code = code * 8 + digitValue(input.byteAt(i)); 1321 } 1322 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 1323 ++i; 1324 code = code * 8 + digitValue(input.byteAt(i)); 1325 } 1326 // TODO: Check that 0 <= code && code <= 0xFF. 1327 result[pos++] = (byte)code; 1328 } else { 1329 switch (c) { 1330 case 'a' : result[pos++] = 0x07; break; 1331 case 'b' : result[pos++] = '\b'; break; 1332 case 'f' : result[pos++] = '\f'; break; 1333 case 'n' : result[pos++] = '\n'; break; 1334 case 'r' : result[pos++] = '\r'; break; 1335 case 't' : result[pos++] = '\t'; break; 1336 case 'v' : result[pos++] = 0x0b; break; 1337 case '\\': result[pos++] = '\\'; break; 1338 case '\'': result[pos++] = '\''; break; 1339 case '"' : result[pos++] = '\"'; break; 1340 1341 case 'x': 1342 // hex escape 1343 int code = 0; 1344 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 1345 ++i; 1346 code = digitValue(input.byteAt(i)); 1347 } else { 1348 throw new InvalidEscapeSequenceException( 1349 "Invalid escape sequence: '\\x' with no digits"); 1350 } 1351 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 1352 ++i; 1353 code = code * 16 + digitValue(input.byteAt(i)); 1354 } 1355 result[pos++] = (byte)code; 1356 break; 1357 1358 default: 1359 throw new InvalidEscapeSequenceException( 1360 "Invalid escape sequence: '\\" + (char)c + '\''); 1361 } 1362 } 1363 } else { 1364 throw new InvalidEscapeSequenceException( 1365 "Invalid escape sequence: '\\' at end of string."); 1366 } 1367 } else { 1368 result[pos++] = c; 1369 } 1370 } 1371 1372 return ByteString.copyFrom(result, 0, pos); 1373 } 1374 1375 /** 1376 * Thrown by {@link TextFormat#unescapeBytes} and 1377 * {@link TextFormat#unescapeText} when an invalid escape sequence is seen. 1378 */ 1379 static class InvalidEscapeSequenceException extends IOException { 1380 private static final long serialVersionUID = -8164033650142593304L; 1381 1382 InvalidEscapeSequenceException(final String description) { 1383 super(description); 1384 } 1385 } 1386 1387 /** 1388 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. 1389 * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped 1390 * individually as a 3-digit octal escape. Yes, it's weird. 1391 */ 1392 static String escapeText(final String input) { 1393 return escapeBytes(ByteString.copyFromUtf8(input)); 1394 } 1395 1396 /** 1397 * Un-escape a text string as escaped using {@link #escapeText(String)}. 1398 * Two-digit hex escapes (starting with "\x") are also recognized. 1399 */ 1400 static String unescapeText(final String input) 1401 throws InvalidEscapeSequenceException { 1402 return unescapeBytes(input).toStringUtf8(); 1403 } 1404 1405 /** Is this an octal digit? */ 1406 private static boolean isOctal(final byte c) { 1407 return '0' <= c && c <= '7'; 1408 } 1409 1410 /** Is this a hex digit? */ 1411 private static boolean isHex(final byte c) { 1412 return ('0' <= c && c <= '9') || 1413 ('a' <= c && c <= 'f') || 1414 ('A' <= c && c <= 'F'); 1415 } 1416 1417 /** 1418 * Interpret a character as a digit (in any base up to 36) and return the 1419 * numeric value. This is like {@code Character.digit()} but we don't accept 1420 * non-ASCII digits. 1421 */ 1422 private static int digitValue(final byte c) { 1423 if ('0' <= c && c <= '9') { 1424 return c - '0'; 1425 } else if ('a' <= c && c <= 'z') { 1426 return c - 'a' + 10; 1427 } else { 1428 return c - 'A' + 10; 1429 } 1430 } 1431 1432 /** 1433 * Parse a 32-bit signed integer from the text. Unlike the Java standard 1434 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1435 * and "0" to signify hexadecimal and octal numbers, respectively. 1436 */ 1437 static int parseInt32(final String text) throws NumberFormatException { 1438 return (int) parseInteger(text, true, false); 1439 } 1440 1441 /** 1442 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard 1443 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1444 * and "0" to signify hexadecimal and octal numbers, respectively. The 1445 * result is coerced to a (signed) {@code int} when returned since Java has 1446 * no unsigned integer type. 1447 */ 1448 static int parseUInt32(final String text) throws NumberFormatException { 1449 return (int) parseInteger(text, false, false); 1450 } 1451 1452 /** 1453 * Parse a 64-bit signed integer from the text. Unlike the Java standard 1454 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1455 * and "0" to signify hexadecimal and octal numbers, respectively. 1456 */ 1457 static long parseInt64(final String text) throws NumberFormatException { 1458 return parseInteger(text, true, true); 1459 } 1460 1461 /** 1462 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard 1463 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1464 * and "0" to signify hexadecimal and octal numbers, respectively. The 1465 * result is coerced to a (signed) {@code long} when returned since Java has 1466 * no unsigned long type. 1467 */ 1468 static long parseUInt64(final String text) throws NumberFormatException { 1469 return parseInteger(text, false, true); 1470 } 1471 1472 private static long parseInteger(final String text, 1473 final boolean isSigned, 1474 final boolean isLong) 1475 throws NumberFormatException { 1476 int pos = 0; 1477 1478 boolean negative = false; 1479 if (text.startsWith("-", pos)) { 1480 if (!isSigned) { 1481 throw new NumberFormatException("Number must be positive: " + text); 1482 } 1483 ++pos; 1484 negative = true; 1485 } 1486 1487 int radix = 10; 1488 if (text.startsWith("0x", pos)) { 1489 pos += 2; 1490 radix = 16; 1491 } else if (text.startsWith("0", pos)) { 1492 radix = 8; 1493 } 1494 1495 final String numberText = text.substring(pos); 1496 1497 long result = 0; 1498 if (numberText.length() < 16) { 1499 // Can safely assume no overflow. 1500 result = Long.parseLong(numberText, radix); 1501 if (negative) { 1502 result = -result; 1503 } 1504 1505 // Check bounds. 1506 // No need to check for 64-bit numbers since they'd have to be 16 chars 1507 // or longer to overflow. 1508 if (!isLong) { 1509 if (isSigned) { 1510 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 1511 throw new NumberFormatException( 1512 "Number out of range for 32-bit signed integer: " + text); 1513 } 1514 } else { 1515 if (result >= (1L << 32) || result < 0) { 1516 throw new NumberFormatException( 1517 "Number out of range for 32-bit unsigned integer: " + text); 1518 } 1519 } 1520 } 1521 } else { 1522 BigInteger bigValue = new BigInteger(numberText, radix); 1523 if (negative) { 1524 bigValue = bigValue.negate(); 1525 } 1526 1527 // Check bounds. 1528 if (!isLong) { 1529 if (isSigned) { 1530 if (bigValue.bitLength() > 31) { 1531 throw new NumberFormatException( 1532 "Number out of range for 32-bit signed integer: " + text); 1533 } 1534 } else { 1535 if (bigValue.bitLength() > 32) { 1536 throw new NumberFormatException( 1537 "Number out of range for 32-bit unsigned integer: " + text); 1538 } 1539 } 1540 } else { 1541 if (isSigned) { 1542 if (bigValue.bitLength() > 63) { 1543 throw new NumberFormatException( 1544 "Number out of range for 64-bit signed integer: " + text); 1545 } 1546 } else { 1547 if (bigValue.bitLength() > 64) { 1548 throw new NumberFormatException( 1549 "Number out of range for 64-bit unsigned integer: " + text); 1550 } 1551 } 1552 } 1553 1554 result = bigValue.longValue(); 1555 } 1556 1557 return result; 1558 } 1559 } 1560