Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 package com.google.protobuf;
     32 
     33 import com.google.protobuf.Descriptors.Descriptor;
     34 import com.google.protobuf.Descriptors.FieldDescriptor;
     35 import com.google.protobuf.Descriptors.EnumDescriptor;
     36 import com.google.protobuf.Descriptors.EnumValueDescriptor;
     37 
     38 import java.io.IOException;
     39 import java.nio.CharBuffer;
     40 import java.math.BigInteger;
     41 import java.util.ArrayList;
     42 import java.util.List;
     43 import java.util.Locale;
     44 import java.util.Map;
     45 import java.util.regex.Matcher;
     46 import java.util.regex.Pattern;
     47 
     48 /**
     49  * Provide text parsing and formatting support for proto2 instances.
     50  * The implementation largely follows google/protobuf/text_format.cc.
     51  *
     52  * @author wenboz (at) google.com Wenbo Zhu
     53  * @author kenton (at) google.com Kenton Varda
     54  */
     55 public final class TextFormat {
     56   private TextFormat() {}
     57 
     58   private static final Printer DEFAULT_PRINTER = new Printer();
     59   private static final Printer SINGLE_LINE_PRINTER =
     60       (new Printer()).setSingleLineMode(true);
     61   private static final Printer UNICODE_PRINTER =
     62       (new Printer()).setEscapeNonAscii(false);
     63 
     64   /**
     65    * Outputs a textual representation of the Protocol Message supplied into
     66    * the parameter output. (This representation is the new version of the
     67    * classic "ProtocolPrinter" output from the original Protocol Buffer system)
     68    */
     69   public static void print(final MessageOrBuilder message, final Appendable output)
     70                            throws IOException {
     71     DEFAULT_PRINTER.print(message, new TextGenerator(output));
     72   }
     73 
     74   /** Outputs a textual representation of {@code fields} to {@code output}. */
     75   public static void print(final UnknownFieldSet fields,
     76                            final Appendable output)
     77                            throws IOException {
     78     DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
     79   }
     80 
     81   /**
     82    * Generates a human readable form of this message, useful for debugging and
     83    * other purposes, with no newline characters.
     84    */
     85   public static String shortDebugString(final MessageOrBuilder message) {
     86     try {
     87       final StringBuilder sb = new StringBuilder();
     88       SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
     89       // Single line mode currently might have an extra space at the end.
     90       return sb.toString().trim();
     91     } catch (IOException e) {
     92       throw new IllegalStateException(e);
     93     }
     94   }
     95 
     96   /**
     97    * Generates a human readable form of the unknown fields, useful for debugging
     98    * and other purposes, with no newline characters.
     99    */
    100   public static String shortDebugString(final UnknownFieldSet fields) {
    101     try {
    102       final StringBuilder sb = new StringBuilder();
    103       SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
    104       // Single line mode currently might have an extra space at the end.
    105       return sb.toString().trim();
    106     } catch (IOException e) {
    107       throw new IllegalStateException(e);
    108     }
    109   }
    110 
    111   /**
    112    * Like {@code print()}, but writes directly to a {@code String} and
    113    * returns it.
    114    */
    115   public static String printToString(final MessageOrBuilder message) {
    116     try {
    117       final StringBuilder text = new StringBuilder();
    118       print(message, text);
    119       return text.toString();
    120     } catch (IOException e) {
    121       throw new IllegalStateException(e);
    122     }
    123   }
    124 
    125   /**
    126    * Like {@code print()}, but writes directly to a {@code String} and
    127    * returns it.
    128    */
    129   public static String printToString(final UnknownFieldSet fields) {
    130     try {
    131       final StringBuilder text = new StringBuilder();
    132       print(fields, text);
    133       return text.toString();
    134     } catch (IOException e) {
    135       throw new IllegalStateException(e);
    136     }
    137   }
    138 
    139   /**
    140    * Same as {@code printToString()}, except that non-ASCII characters
    141    * in string type fields are not escaped in backslash+octals.
    142    */
    143   public static String printToUnicodeString(final MessageOrBuilder message) {
    144     try {
    145       final StringBuilder text = new StringBuilder();
    146       UNICODE_PRINTER.print(message, new TextGenerator(text));
    147       return text.toString();
    148     } catch (IOException e) {
    149       throw new IllegalStateException(e);
    150     }
    151   }
    152 
    153   /**
    154    * Same as {@code printToString()}, except that non-ASCII characters
    155    * in string type fields are not escaped in backslash+octals.
    156    */
    157   public static String printToUnicodeString(final UnknownFieldSet fields) {
    158     try {
    159       final StringBuilder text = new StringBuilder();
    160       UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text));
    161       return text.toString();
    162     } catch (IOException e) {
    163       throw new IllegalStateException(e);
    164     }
    165   }
    166 
    167   public static void printField(final FieldDescriptor field,
    168                                 final Object value,
    169                                 final Appendable output)
    170                                 throws IOException {
    171     DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
    172   }
    173 
    174   public static String printFieldToString(final FieldDescriptor field,
    175                                           final Object value) {
    176     try {
    177       final StringBuilder text = new StringBuilder();
    178       printField(field, value, text);
    179       return text.toString();
    180     } catch (IOException e) {
    181       throw new IllegalStateException(e);
    182     }
    183   }
    184 
    185   /**
    186    * Outputs a textual representation of the value of given field value.
    187    *
    188    * @param field the descriptor of the field
    189    * @param value the value of the field
    190    * @param output the output to which to append the formatted value
    191    * @throws ClassCastException if the value is not appropriate for the
    192    *     given field descriptor
    193    * @throws IOException if there is an exception writing to the output
    194    */
    195   public static void printFieldValue(final FieldDescriptor field,
    196                                      final Object value,
    197                                      final Appendable output)
    198                                      throws IOException {
    199     DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
    200   }
    201 
    202   /**
    203    * Outputs a textual representation of the value of an unknown field.
    204    *
    205    * @param tag the field's tag number
    206    * @param value the value of the field
    207    * @param output the output to which to append the formatted value
    208    * @throws ClassCastException if the value is not appropriate for the
    209    *     given field descriptor
    210    * @throws IOException if there is an exception writing to the output
    211    */
    212   public static void printUnknownFieldValue(final int tag,
    213                                             final Object value,
    214                                             final Appendable output)
    215                                             throws IOException {
    216     printUnknownFieldValue(tag, value, new TextGenerator(output));
    217   }
    218 
    219   private static void printUnknownFieldValue(final int tag,
    220                                              final Object value,
    221                                              final TextGenerator generator)
    222                                              throws IOException {
    223     switch (WireFormat.getTagWireType(tag)) {
    224       case WireFormat.WIRETYPE_VARINT:
    225         generator.print(unsignedToString((Long) value));
    226         break;
    227       case WireFormat.WIRETYPE_FIXED32:
    228         generator.print(
    229             String.format((Locale) null, "0x%08x", (Integer) value));
    230         break;
    231       case WireFormat.WIRETYPE_FIXED64:
    232         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
    233         break;
    234       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
    235         generator.print("\"");
    236         generator.print(escapeBytes((ByteString) value));
    237         generator.print("\"");
    238         break;
    239       case WireFormat.WIRETYPE_START_GROUP:
    240         DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
    241         break;
    242       default:
    243         throw new IllegalArgumentException("Bad tag: " + tag);
    244     }
    245   }
    246 
    247   /** Helper class for converting protobufs to text. */
    248   private static final class Printer {
    249     /** Whether to omit newlines from the output. */
    250     boolean singleLineMode = false;
    251 
    252     /** Whether to escape non ASCII characters with backslash and octal. */
    253     boolean escapeNonAscii = true;
    254 
    255     private Printer() {}
    256 
    257     /** Setter of singleLineMode */
    258     private Printer setSingleLineMode(boolean singleLineMode) {
    259       this.singleLineMode = singleLineMode;
    260       return this;
    261     }
    262 
    263     /** Setter of escapeNonAscii */
    264     private Printer setEscapeNonAscii(boolean escapeNonAscii) {
    265       this.escapeNonAscii = escapeNonAscii;
    266       return this;
    267     }
    268 
    269     private void print(final MessageOrBuilder message, final TextGenerator generator)
    270         throws IOException {
    271       for (Map.Entry<FieldDescriptor, Object> field
    272           : message.getAllFields().entrySet()) {
    273         printField(field.getKey(), field.getValue(), generator);
    274       }
    275       printUnknownFields(message.getUnknownFields(), generator);
    276     }
    277 
    278     private void printField(final FieldDescriptor field, final Object value,
    279         final TextGenerator generator) throws IOException {
    280       if (field.isRepeated()) {
    281         // Repeated field.  Print each element.
    282         for (Object element : (List<?>) value) {
    283           printSingleField(field, element, generator);
    284         }
    285       } else {
    286         printSingleField(field, value, generator);
    287       }
    288     }
    289 
    290     private void printSingleField(final FieldDescriptor field,
    291                                   final Object value,
    292                                   final TextGenerator generator)
    293                                   throws IOException {
    294       if (field.isExtension()) {
    295         generator.print("[");
    296         // We special-case MessageSet elements for compatibility with proto1.
    297         if (field.getContainingType().getOptions().getMessageSetWireFormat()
    298             && (field.getType() == FieldDescriptor.Type.MESSAGE)
    299             && (field.isOptional())
    300             // object equality
    301             && (field.getExtensionScope() == field.getMessageType())) {
    302           generator.print(field.getMessageType().getFullName());
    303         } else {
    304           generator.print(field.getFullName());
    305         }
    306         generator.print("]");
    307       } else {
    308         if (field.getType() == FieldDescriptor.Type.GROUP) {
    309           // Groups must be serialized with their original capitalization.
    310           generator.print(field.getMessageType().getName());
    311         } else {
    312           generator.print(field.getName());
    313         }
    314       }
    315 
    316       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
    317         if (singleLineMode) {
    318           generator.print(" { ");
    319         } else {
    320           generator.print(" {\n");
    321           generator.indent();
    322         }
    323       } else {
    324         generator.print(": ");
    325       }
    326 
    327       printFieldValue(field, value, generator);
    328 
    329       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
    330         if (singleLineMode) {
    331           generator.print("} ");
    332         } else {
    333           generator.outdent();
    334           generator.print("}\n");
    335         }
    336       } else {
    337         if (singleLineMode) {
    338           generator.print(" ");
    339         } else {
    340           generator.print("\n");
    341         }
    342       }
    343     }
    344 
    345     private void printFieldValue(final FieldDescriptor field,
    346                                  final Object value,
    347                                  final TextGenerator generator)
    348                                  throws IOException {
    349       switch (field.getType()) {
    350         case INT32:
    351         case SINT32:
    352         case SFIXED32:
    353           generator.print(((Integer) value).toString());
    354           break;
    355 
    356         case INT64:
    357         case SINT64:
    358         case SFIXED64:
    359           generator.print(((Long) value).toString());
    360           break;
    361 
    362         case BOOL:
    363           generator.print(((Boolean) value).toString());
    364           break;
    365 
    366         case FLOAT:
    367           generator.print(((Float) value).toString());
    368           break;
    369 
    370         case DOUBLE:
    371           generator.print(((Double) value).toString());
    372           break;
    373 
    374         case UINT32:
    375         case FIXED32:
    376           generator.print(unsignedToString((Integer) value));
    377           break;
    378 
    379         case UINT64:
    380         case FIXED64:
    381           generator.print(unsignedToString((Long) value));
    382           break;
    383 
    384         case STRING:
    385           generator.print("\"");
    386           generator.print(escapeNonAscii ?
    387               escapeText((String) value) :
    388               (String) value);
    389           generator.print("\"");
    390           break;
    391 
    392         case BYTES:
    393           generator.print("\"");
    394           generator.print(escapeBytes((ByteString) value));
    395           generator.print("\"");
    396           break;
    397 
    398         case ENUM:
    399           generator.print(((EnumValueDescriptor) value).getName());
    400           break;
    401 
    402         case MESSAGE:
    403         case GROUP:
    404           print((Message) value, generator);
    405           break;
    406       }
    407     }
    408 
    409     private void printUnknownFields(final UnknownFieldSet unknownFields,
    410                                     final TextGenerator generator)
    411                                     throws IOException {
    412       for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
    413                unknownFields.asMap().entrySet()) {
    414         final int number = entry.getKey();
    415         final UnknownFieldSet.Field field = entry.getValue();
    416         printUnknownField(number, WireFormat.WIRETYPE_VARINT,
    417             field.getVarintList(), generator);
    418         printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
    419             field.getFixed32List(), generator);
    420         printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
    421             field.getFixed64List(), generator);
    422         printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
    423             field.getLengthDelimitedList(), generator);
    424         for (final UnknownFieldSet value : field.getGroupList()) {
    425           generator.print(entry.getKey().toString());
    426           if (singleLineMode) {
    427             generator.print(" { ");
    428           } else {
    429             generator.print(" {\n");
    430             generator.indent();
    431           }
    432           printUnknownFields(value, generator);
    433           if (singleLineMode) {
    434             generator.print("} ");
    435           } else {
    436             generator.outdent();
    437             generator.print("}\n");
    438           }
    439         }
    440       }
    441     }
    442 
    443     private void printUnknownField(final int number,
    444                                    final int wireType,
    445                                    final List<?> values,
    446                                    final TextGenerator generator)
    447                                    throws IOException {
    448       for (final Object value : values) {
    449         generator.print(String.valueOf(number));
    450         generator.print(": ");
    451         printUnknownFieldValue(wireType, value, generator);
    452         generator.print(singleLineMode ? " " : "\n");
    453       }
    454     }
    455   }
    456 
    457   /** Convert an unsigned 32-bit integer to a string. */
    458   private static String unsignedToString(final int value) {
    459     if (value >= 0) {
    460       return Integer.toString(value);
    461     } else {
    462       return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
    463     }
    464   }
    465 
    466   /** Convert an unsigned 64-bit integer to a string. */
    467   private static String unsignedToString(final long value) {
    468     if (value >= 0) {
    469       return Long.toString(value);
    470     } else {
    471       // Pull off the most-significant bit so that BigInteger doesn't think
    472       // the number is negative, then set it again using setBit().
    473       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
    474                        .setBit(63).toString();
    475     }
    476   }
    477 
    478   /**
    479    * An inner class for writing text to the output stream.
    480    */
    481   private static final class TextGenerator {
    482     private final Appendable output;
    483     private final StringBuilder indent = new StringBuilder();
    484     private boolean atStartOfLine = true;
    485 
    486     private TextGenerator(final Appendable output) {
    487       this.output = output;
    488     }
    489 
    490     /**
    491      * Indent text by two spaces.  After calling Indent(), two spaces will be
    492      * inserted at the beginning of each line of text.  Indent() may be called
    493      * multiple times to produce deeper indents.
    494      */
    495     public void indent() {
    496       indent.append("  ");
    497     }
    498 
    499     /**
    500      * Reduces the current indent level by two spaces, or crashes if the indent
    501      * level is zero.
    502      */
    503     public void outdent() {
    504       final int length = indent.length();
    505       if (length == 0) {
    506         throw new IllegalArgumentException(
    507             " Outdent() without matching Indent().");
    508       }
    509       indent.delete(length - 2, length);
    510     }
    511 
    512     /**
    513      * Print text to the output stream.
    514      */
    515     public void print(final CharSequence text) throws IOException {
    516       final int size = text.length();
    517       int pos = 0;
    518 
    519       for (int i = 0; i < size; i++) {
    520         if (text.charAt(i) == '\n') {
    521           write(text.subSequence(pos, size), i - pos + 1);
    522           pos = i + 1;
    523           atStartOfLine = true;
    524         }
    525       }
    526       write(text.subSequence(pos, size), size - pos);
    527     }
    528 
    529     private void write(final CharSequence data, final int size)
    530                        throws IOException {
    531       if (size == 0) {
    532         return;
    533       }
    534       if (atStartOfLine) {
    535         atStartOfLine = false;
    536         output.append(indent);
    537       }
    538       output.append(data);
    539     }
    540   }
    541 
    542   // =================================================================
    543   // Parsing
    544 
    545   /**
    546    * Represents a stream of tokens parsed from a {@code String}.
    547    *
    548    * <p>The Java standard library provides many classes that you might think
    549    * would be useful for implementing this, but aren't.  For example:
    550    *
    551    * <ul>
    552    * <li>{@code java.io.StreamTokenizer}:  This almost does what we want -- or,
    553    *   at least, something that would get us close to what we want -- except
    554    *   for one fatal flaw:  It automatically un-escapes strings using Java
    555    *   escape sequences, which do not include all the escape sequences we
    556    *   need to support (e.g. '\x').
    557    * <li>{@code java.util.Scanner}:  This seems like a great way at least to
    558    *   parse regular expressions out of a stream (so we wouldn't have to load
    559    *   the entire input into a single string before parsing).  Sadly,
    560    *   {@code Scanner} requires that tokens be delimited with some delimiter.
    561    *   Thus, although the text "foo:" should parse to two tokens ("foo" and
    562    *   ":"), {@code Scanner} would recognize it only as a single token.
    563    *   Furthermore, {@code Scanner} provides no way to inspect the contents
    564    *   of delimiters, making it impossible to keep track of line and column
    565    *   numbers.
    566    * </ul>
    567    *
    568    * <p>Luckily, Java's regular expression support does manage to be useful to
    569    * us.  (Barely:  We need {@code Matcher.usePattern()}, which is new in
    570    * Java 1.5.)  So, we can use that, at least.  Unfortunately, this implies
    571    * that we need to have the entire input in one contiguous string.
    572    */
    573   private static final class Tokenizer {
    574     private final CharSequence text;
    575     private final Matcher matcher;
    576     private String currentToken;
    577 
    578     // The character index within this.text at which the current token begins.
    579     private int pos = 0;
    580 
    581     // The line and column numbers of the current token.
    582     private int line = 0;
    583     private int column = 0;
    584 
    585     // The line and column numbers of the previous token (allows throwing
    586     // errors *after* consuming).
    587     private int previousLine = 0;
    588     private int previousColumn = 0;
    589 
    590     // We use possessive quantifiers (*+ and ++) because otherwise the Java
    591     // regex matcher has stack overflows on large inputs.
    592     private static final Pattern WHITESPACE =
    593       Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
    594     private static final Pattern TOKEN = Pattern.compile(
    595       "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
    596       "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
    597       "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
    598       "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
    599       Pattern.MULTILINE);
    600 
    601     private static final Pattern DOUBLE_INFINITY = Pattern.compile(
    602       "-?inf(inity)?",
    603       Pattern.CASE_INSENSITIVE);
    604     private static final Pattern FLOAT_INFINITY = Pattern.compile(
    605       "-?inf(inity)?f?",
    606       Pattern.CASE_INSENSITIVE);
    607     private static final Pattern FLOAT_NAN = Pattern.compile(
    608       "nanf?",
    609       Pattern.CASE_INSENSITIVE);
    610 
    611     /** Construct a tokenizer that parses tokens from the given text. */
    612     private Tokenizer(final CharSequence text) {
    613       this.text = text;
    614       this.matcher = WHITESPACE.matcher(text);
    615       skipWhitespace();
    616       nextToken();
    617     }
    618 
    619     /** Are we at the end of the input? */
    620     public boolean atEnd() {
    621       return currentToken.length() == 0;
    622     }
    623 
    624     /** Advance to the next token. */
    625     public void nextToken() {
    626       previousLine = line;
    627       previousColumn = column;
    628 
    629       // Advance the line counter to the current position.
    630       while (pos < matcher.regionStart()) {
    631         if (text.charAt(pos) == '\n') {
    632           ++line;
    633           column = 0;
    634         } else {
    635           ++column;
    636         }
    637         ++pos;
    638       }
    639 
    640       // Match the next token.
    641       if (matcher.regionStart() == matcher.regionEnd()) {
    642         // EOF
    643         currentToken = "";
    644       } else {
    645         matcher.usePattern(TOKEN);
    646         if (matcher.lookingAt()) {
    647           currentToken = matcher.group();
    648           matcher.region(matcher.end(), matcher.regionEnd());
    649         } else {
    650           // Take one character.
    651           currentToken = String.valueOf(text.charAt(pos));
    652           matcher.region(pos + 1, matcher.regionEnd());
    653         }
    654 
    655         skipWhitespace();
    656       }
    657     }
    658 
    659     /**
    660      * Skip over any whitespace so that the matcher region starts at the next
    661      * token.
    662      */
    663     private void skipWhitespace() {
    664       matcher.usePattern(WHITESPACE);
    665       if (matcher.lookingAt()) {
    666         matcher.region(matcher.end(), matcher.regionEnd());
    667       }
    668     }
    669 
    670     /**
    671      * If the next token exactly matches {@code token}, consume it and return
    672      * {@code true}.  Otherwise, return {@code false} without doing anything.
    673      */
    674     public boolean tryConsume(final String token) {
    675       if (currentToken.equals(token)) {
    676         nextToken();
    677         return true;
    678       } else {
    679         return false;
    680       }
    681     }
    682 
    683     /**
    684      * If the next token exactly matches {@code token}, consume it.  Otherwise,
    685      * throw a {@link ParseException}.
    686      */
    687     public void consume(final String token) throws ParseException {
    688       if (!tryConsume(token)) {
    689         throw parseException("Expected \"" + token + "\".");
    690       }
    691     }
    692 
    693     /**
    694      * Returns {@code true} if the next token is an integer, but does
    695      * not consume it.
    696      */
    697     public boolean lookingAtInteger() {
    698       if (currentToken.length() == 0) {
    699         return false;
    700       }
    701 
    702       final char c = currentToken.charAt(0);
    703       return ('0' <= c && c <= '9') ||
    704              c == '-' || c == '+';
    705     }
    706 
    707     /**
    708      * If the next token is an identifier, consume it and return its value.
    709      * Otherwise, throw a {@link ParseException}.
    710      */
    711     public String consumeIdentifier() throws ParseException {
    712       for (int i = 0; i < currentToken.length(); i++) {
    713         final char c = currentToken.charAt(i);
    714         if (('a' <= c && c <= 'z') ||
    715             ('A' <= c && c <= 'Z') ||
    716             ('0' <= c && c <= '9') ||
    717             (c == '_') || (c == '.')) {
    718           // OK
    719         } else {
    720           throw parseException("Expected identifier.");
    721         }
    722       }
    723 
    724       final String result = currentToken;
    725       nextToken();
    726       return result;
    727     }
    728 
    729     /**
    730      * If the next token is a 32-bit signed integer, consume it and return its
    731      * value.  Otherwise, throw a {@link ParseException}.
    732      */
    733     public int consumeInt32() throws ParseException {
    734       try {
    735         final int result = parseInt32(currentToken);
    736         nextToken();
    737         return result;
    738       } catch (NumberFormatException e) {
    739         throw integerParseException(e);
    740       }
    741     }
    742 
    743     /**
    744      * If the next token is a 32-bit unsigned integer, consume it and return its
    745      * value.  Otherwise, throw a {@link ParseException}.
    746      */
    747     public int consumeUInt32() throws ParseException {
    748       try {
    749         final int result = parseUInt32(currentToken);
    750         nextToken();
    751         return result;
    752       } catch (NumberFormatException e) {
    753         throw integerParseException(e);
    754       }
    755     }
    756 
    757     /**
    758      * If the next token is a 64-bit signed integer, consume it and return its
    759      * value.  Otherwise, throw a {@link ParseException}.
    760      */
    761     public long consumeInt64() throws ParseException {
    762       try {
    763         final long result = parseInt64(currentToken);
    764         nextToken();
    765         return result;
    766       } catch (NumberFormatException e) {
    767         throw integerParseException(e);
    768       }
    769     }
    770 
    771     /**
    772      * If the next token is a 64-bit unsigned integer, consume it and return its
    773      * value.  Otherwise, throw a {@link ParseException}.
    774      */
    775     public long consumeUInt64() throws ParseException {
    776       try {
    777         final long result = parseUInt64(currentToken);
    778         nextToken();
    779         return result;
    780       } catch (NumberFormatException e) {
    781         throw integerParseException(e);
    782       }
    783     }
    784 
    785     /**
    786      * If the next token is a double, consume it and return its value.
    787      * Otherwise, throw a {@link ParseException}.
    788      */
    789     public double consumeDouble() throws ParseException {
    790       // We need to parse infinity and nan separately because
    791       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
    792       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
    793         final boolean negative = currentToken.startsWith("-");
    794         nextToken();
    795         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
    796       }
    797       if (currentToken.equalsIgnoreCase("nan")) {
    798         nextToken();
    799         return Double.NaN;
    800       }
    801       try {
    802         final double result = Double.parseDouble(currentToken);
    803         nextToken();
    804         return result;
    805       } catch (NumberFormatException e) {
    806         throw floatParseException(e);
    807       }
    808     }
    809 
    810     /**
    811      * If the next token is a float, consume it and return its value.
    812      * Otherwise, throw a {@link ParseException}.
    813      */
    814     public float consumeFloat() throws ParseException {
    815       // We need to parse infinity and nan separately because
    816       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
    817       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
    818         final boolean negative = currentToken.startsWith("-");
    819         nextToken();
    820         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
    821       }
    822       if (FLOAT_NAN.matcher(currentToken).matches()) {
    823         nextToken();
    824         return Float.NaN;
    825       }
    826       try {
    827         final float result = Float.parseFloat(currentToken);
    828         nextToken();
    829         return result;
    830       } catch (NumberFormatException e) {
    831         throw floatParseException(e);
    832       }
    833     }
    834 
    835     /**
    836      * If the next token is a boolean, consume it and return its value.
    837      * Otherwise, throw a {@link ParseException}.
    838      */
    839     public boolean consumeBoolean() throws ParseException {
    840       if (currentToken.equals("true") ||
    841           currentToken.equals("t") ||
    842           currentToken.equals("1")) {
    843         nextToken();
    844         return true;
    845       } else if (currentToken.equals("false") ||
    846                  currentToken.equals("f") ||
    847                  currentToken.equals("0")) {
    848         nextToken();
    849         return false;
    850       } else {
    851         throw parseException("Expected \"true\" or \"false\".");
    852       }
    853     }
    854 
    855     /**
    856      * If the next token is a string, consume it and return its (unescaped)
    857      * value.  Otherwise, throw a {@link ParseException}.
    858      */
    859     public String consumeString() throws ParseException {
    860       return consumeByteString().toStringUtf8();
    861     }
    862 
    863     /**
    864      * If the next token is a string, consume it, unescape it as a
    865      * {@link ByteString}, and return it.  Otherwise, throw a
    866      * {@link ParseException}.
    867      */
    868     public ByteString consumeByteString() throws ParseException {
    869       List<ByteString> list = new ArrayList<ByteString>();
    870       consumeByteString(list);
    871       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
    872         consumeByteString(list);
    873       }
    874       return ByteString.copyFrom(list);
    875     }
    876 
    877     /**
    878      * Like {@link #consumeByteString()} but adds each token of the string to
    879      * the given list.  String literals (whether bytes or text) may come in
    880      * multiple adjacent tokens which are automatically concatenated, like in
    881      * C or Python.
    882      */
    883     private void consumeByteString(List<ByteString> list) throws ParseException {
    884       final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
    885                                                    : '\0';
    886       if (quote != '\"' && quote != '\'') {
    887         throw parseException("Expected string.");
    888       }
    889 
    890       if (currentToken.length() < 2 ||
    891           currentToken.charAt(currentToken.length() - 1) != quote) {
    892         throw parseException("String missing ending quote.");
    893       }
    894 
    895       try {
    896         final String escaped =
    897             currentToken.substring(1, currentToken.length() - 1);
    898         final ByteString result = unescapeBytes(escaped);
    899         nextToken();
    900         list.add(result);
    901       } catch (InvalidEscapeSequenceException e) {
    902         throw parseException(e.getMessage());
    903       }
    904     }
    905 
    906     /**
    907      * Returns a {@link ParseException} with the current line and column
    908      * numbers in the description, suitable for throwing.
    909      */
    910     public ParseException parseException(final String description) {
    911       // Note:  People generally prefer one-based line and column numbers.
    912       return new ParseException(
    913         line + 1, column + 1, description);
    914     }
    915 
    916     /**
    917      * Returns a {@link ParseException} with the line and column numbers of
    918      * the previous token in the description, suitable for throwing.
    919      */
    920     public ParseException parseExceptionPreviousToken(
    921         final String description) {
    922       // Note:  People generally prefer one-based line and column numbers.
    923       return new ParseException(
    924         previousLine + 1, previousColumn + 1, description);
    925     }
    926 
    927     /**
    928      * Constructs an appropriate {@link ParseException} for the given
    929      * {@code NumberFormatException} when trying to parse an integer.
    930      */
    931     private ParseException integerParseException(
    932         final NumberFormatException e) {
    933       return parseException("Couldn't parse integer: " + e.getMessage());
    934     }
    935 
    936     /**
    937      * Constructs an appropriate {@link ParseException} for the given
    938      * {@code NumberFormatException} when trying to parse a float or double.
    939      */
    940     private ParseException floatParseException(final NumberFormatException e) {
    941       return parseException("Couldn't parse number: " + e.getMessage());
    942     }
    943   }
    944 
    945   /** Thrown when parsing an invalid text format message. */
    946   public static class ParseException extends IOException {
    947     private static final long serialVersionUID = 3196188060225107702L;
    948 
    949     private final int line;
    950     private final int column;
    951 
    952     /** Create a new instance, with -1 as the line and column numbers. */
    953     public ParseException(final String message) {
    954       this(-1, -1, message);
    955     }
    956 
    957     /**
    958      * Create a new instance
    959      *
    960      * @param line the line number where the parse error occurred,
    961      * using 1-offset.
    962      * @param column the column number where the parser error occurred,
    963      * using 1-offset.
    964      */
    965     public ParseException(final int line, final int column,
    966         final String message) {
    967       super(Integer.toString(line) + ":" + column + ": " + message);
    968       this.line = line;
    969       this.column = column;
    970     }
    971 
    972     /**
    973      * Return the line where the parse exception occurred, or -1 when
    974      * none is provided. The value is specified as 1-offset, so the first
    975      * line is line 1.
    976      */
    977     public int getLine() {
    978       return line;
    979     }
    980 
    981     /**
    982      * Return the column where the parse exception occurred, or -1 when
    983      * none is provided. The value is specified as 1-offset, so the first
    984      * line is line 1.
    985      */
    986     public int getColumn() {
    987       return column;
    988     }
    989   }
    990 
    991   /**
    992    * Parse a text-format message from {@code input} and merge the contents
    993    * into {@code builder}.
    994    */
    995   public static void merge(final Readable input,
    996                            final Message.Builder builder)
    997                            throws IOException {
    998     merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
    999   }
   1000 
   1001   /**
   1002    * Parse a text-format message from {@code input} and merge the contents
   1003    * into {@code builder}.
   1004    */
   1005   public static void merge(final CharSequence input,
   1006                            final Message.Builder builder)
   1007                            throws ParseException {
   1008     merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
   1009   }
   1010 
   1011   /**
   1012    * Parse a text-format message from {@code input} and merge the contents
   1013    * into {@code builder}.  Extensions will be recognized if they are
   1014    * registered in {@code extensionRegistry}.
   1015    */
   1016   public static void merge(final Readable input,
   1017                            final ExtensionRegistry extensionRegistry,
   1018                            final Message.Builder builder)
   1019                            throws IOException {
   1020     // Read the entire input to a String then parse that.
   1021 
   1022     // If StreamTokenizer were not quite so crippled, or if there were a kind
   1023     // of Reader that could read in chunks that match some particular regex,
   1024     // or if we wanted to write a custom Reader to tokenize our stream, then
   1025     // we would not have to read to one big String.  Alas, none of these is
   1026     // the case.  Oh well.
   1027 
   1028     merge(toStringBuilder(input), extensionRegistry, builder);
   1029   }
   1030 
   1031   private static final int BUFFER_SIZE = 4096;
   1032 
   1033   // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
   1034   // overhead is worthwhile
   1035   private static StringBuilder toStringBuilder(final Readable input)
   1036       throws IOException {
   1037     final StringBuilder text = new StringBuilder();
   1038     final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
   1039     while (true) {
   1040       final int n = input.read(buffer);
   1041       if (n == -1) {
   1042         break;
   1043       }
   1044       buffer.flip();
   1045       text.append(buffer, 0, n);
   1046     }
   1047     return text;
   1048   }
   1049 
   1050   /**
   1051    * Parse a text-format message from {@code input} and merge the contents
   1052    * into {@code builder}.  Extensions will be recognized if they are
   1053    * registered in {@code extensionRegistry}.
   1054    */
   1055   public static void merge(final CharSequence input,
   1056                            final ExtensionRegistry extensionRegistry,
   1057                            final Message.Builder builder)
   1058                            throws ParseException {
   1059     final Tokenizer tokenizer = new Tokenizer(input);
   1060 
   1061     while (!tokenizer.atEnd()) {
   1062       mergeField(tokenizer, extensionRegistry, builder);
   1063     }
   1064   }
   1065 
   1066   /**
   1067    * Parse a single field from {@code tokenizer} and merge it into
   1068    * {@code builder}.
   1069    */
   1070   private static void mergeField(final Tokenizer tokenizer,
   1071                                  final ExtensionRegistry extensionRegistry,
   1072                                  final Message.Builder builder)
   1073                                  throws ParseException {
   1074     FieldDescriptor field;
   1075     final Descriptor type = builder.getDescriptorForType();
   1076     ExtensionRegistry.ExtensionInfo extension = null;
   1077 
   1078     if (tokenizer.tryConsume("[")) {
   1079       // An extension.
   1080       final StringBuilder name =
   1081           new StringBuilder(tokenizer.consumeIdentifier());
   1082       while (tokenizer.tryConsume(".")) {
   1083         name.append('.');
   1084         name.append(tokenizer.consumeIdentifier());
   1085       }
   1086 
   1087       extension = extensionRegistry.findExtensionByName(name.toString());
   1088 
   1089       if (extension == null) {
   1090         throw tokenizer.parseExceptionPreviousToken(
   1091           "Extension \"" + name + "\" not found in the ExtensionRegistry.");
   1092       } else if (extension.descriptor.getContainingType() != type) {
   1093         throw tokenizer.parseExceptionPreviousToken(
   1094           "Extension \"" + name + "\" does not extend message type \"" +
   1095           type.getFullName() + "\".");
   1096       }
   1097 
   1098       tokenizer.consume("]");
   1099 
   1100       field = extension.descriptor;
   1101     } else {
   1102       final String name = tokenizer.consumeIdentifier();
   1103       field = type.findFieldByName(name);
   1104 
   1105       // Group names are expected to be capitalized as they appear in the
   1106       // .proto file, which actually matches their type names, not their field
   1107       // names.
   1108       if (field == null) {
   1109         // Explicitly specify US locale so that this code does not break when
   1110         // executing in Turkey.
   1111         final String lowerName = name.toLowerCase(Locale.US);
   1112         field = type.findFieldByName(lowerName);
   1113         // If the case-insensitive match worked but the field is NOT a group,
   1114         if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
   1115           field = null;
   1116         }
   1117       }
   1118       // Again, special-case group names as described above.
   1119       if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
   1120           !field.getMessageType().getName().equals(name)) {
   1121         field = null;
   1122       }
   1123 
   1124       if (field == null) {
   1125         throw tokenizer.parseExceptionPreviousToken(
   1126           "Message type \"" + type.getFullName() +
   1127           "\" has no field named \"" + name + "\".");
   1128       }
   1129     }
   1130 
   1131     Object value = null;
   1132 
   1133     if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
   1134       tokenizer.tryConsume(":");  // optional
   1135 
   1136       final String endToken;
   1137       if (tokenizer.tryConsume("<")) {
   1138         endToken = ">";
   1139       } else {
   1140         tokenizer.consume("{");
   1141         endToken = "}";
   1142       }
   1143 
   1144       final Message.Builder subBuilder;
   1145       if (extension == null) {
   1146         subBuilder = builder.newBuilderForField(field);
   1147       } else {
   1148         subBuilder = extension.defaultInstance.newBuilderForType();
   1149       }
   1150 
   1151       while (!tokenizer.tryConsume(endToken)) {
   1152         if (tokenizer.atEnd()) {
   1153           throw tokenizer.parseException(
   1154             "Expected \"" + endToken + "\".");
   1155         }
   1156         mergeField(tokenizer, extensionRegistry, subBuilder);
   1157       }
   1158 
   1159       value = subBuilder.buildPartial();
   1160 
   1161     } else {
   1162       tokenizer.consume(":");
   1163 
   1164       switch (field.getType()) {
   1165         case INT32:
   1166         case SINT32:
   1167         case SFIXED32:
   1168           value = tokenizer.consumeInt32();
   1169           break;
   1170 
   1171         case INT64:
   1172         case SINT64:
   1173         case SFIXED64:
   1174           value = tokenizer.consumeInt64();
   1175           break;
   1176 
   1177         case UINT32:
   1178         case FIXED32:
   1179           value = tokenizer.consumeUInt32();
   1180           break;
   1181 
   1182         case UINT64:
   1183         case FIXED64:
   1184           value = tokenizer.consumeUInt64();
   1185           break;
   1186 
   1187         case FLOAT:
   1188           value = tokenizer.consumeFloat();
   1189           break;
   1190 
   1191         case DOUBLE:
   1192           value = tokenizer.consumeDouble();
   1193           break;
   1194 
   1195         case BOOL:
   1196           value = tokenizer.consumeBoolean();
   1197           break;
   1198 
   1199         case STRING:
   1200           value = tokenizer.consumeString();
   1201           break;
   1202 
   1203         case BYTES:
   1204           value = tokenizer.consumeByteString();
   1205           break;
   1206 
   1207         case ENUM:
   1208           final EnumDescriptor enumType = field.getEnumType();
   1209 
   1210           if (tokenizer.lookingAtInteger()) {
   1211             final int number = tokenizer.consumeInt32();
   1212             value = enumType.findValueByNumber(number);
   1213             if (value == null) {
   1214               throw tokenizer.parseExceptionPreviousToken(
   1215                 "Enum type \"" + enumType.getFullName() +
   1216                 "\" has no value with number " + number + '.');
   1217             }
   1218           } else {
   1219             final String id = tokenizer.consumeIdentifier();
   1220             value = enumType.findValueByName(id);
   1221             if (value == null) {
   1222               throw tokenizer.parseExceptionPreviousToken(
   1223                 "Enum type \"" + enumType.getFullName() +
   1224                 "\" has no value named \"" + id + "\".");
   1225             }
   1226           }
   1227 
   1228           break;
   1229 
   1230         case MESSAGE:
   1231         case GROUP:
   1232           throw new RuntimeException("Can't get here.");
   1233       }
   1234     }
   1235 
   1236     if (field.isRepeated()) {
   1237       builder.addRepeatedField(field, value);
   1238     } else {
   1239       builder.setField(field, value);
   1240     }
   1241   }
   1242 
   1243   // =================================================================
   1244   // Utility functions
   1245   //
   1246   // Some of these methods are package-private because Descriptors.java uses
   1247   // them.
   1248 
   1249   /**
   1250    * Escapes bytes in the format used in protocol buffer text format, which
   1251    * is the same as the format used for C string literals.  All bytes
   1252    * that are not printable 7-bit ASCII characters are escaped, as well as
   1253    * backslash, single-quote, and double-quote characters.  Characters for
   1254    * which no defined short-hand escape sequence is defined will be escaped
   1255    * using 3-digit octal sequences.
   1256    */
   1257   static String escapeBytes(final ByteString input) {
   1258     final StringBuilder builder = new StringBuilder(input.size());
   1259     for (int i = 0; i < input.size(); i++) {
   1260       final byte b = input.byteAt(i);
   1261       switch (b) {
   1262         // Java does not recognize \a or \v, apparently.
   1263         case 0x07: builder.append("\\a" ); break;
   1264         case '\b': builder.append("\\b" ); break;
   1265         case '\f': builder.append("\\f" ); break;
   1266         case '\n': builder.append("\\n" ); break;
   1267         case '\r': builder.append("\\r" ); break;
   1268         case '\t': builder.append("\\t" ); break;
   1269         case 0x0b: builder.append("\\v" ); break;
   1270         case '\\': builder.append("\\\\"); break;
   1271         case '\'': builder.append("\\\'"); break;
   1272         case '"' : builder.append("\\\""); break;
   1273         default:
   1274           // Note:  Bytes with the high-order bit set should be escaped.  Since
   1275           //   bytes are signed, such bytes will compare less than 0x20, hence
   1276           //   the following line is correct.
   1277           if (b >= 0x20) {
   1278             builder.append((char) b);
   1279           } else {
   1280             builder.append('\\');
   1281             builder.append((char) ('0' + ((b >>> 6) & 3)));
   1282             builder.append((char) ('0' + ((b >>> 3) & 7)));
   1283             builder.append((char) ('0' + (b & 7)));
   1284           }
   1285           break;
   1286       }
   1287     }
   1288     return builder.toString();
   1289   }
   1290 
   1291   /**
   1292    * Un-escape a byte sequence as escaped using
   1293    * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
   1294    * "\x") are also recognized.
   1295    */
   1296   static ByteString unescapeBytes(final CharSequence charString)
   1297       throws InvalidEscapeSequenceException {
   1298     // First convert the Java character sequence to UTF-8 bytes.
   1299     ByteString input = ByteString.copyFromUtf8(charString.toString());
   1300     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
   1301     // escapes can all be expressed with ASCII characters, so it is safe to
   1302     // operate on bytes here.
   1303     //
   1304     // Unescaping the input byte array will result in a byte sequence that's no
   1305     // longer than the input.  That's because each escape sequence is between
   1306     // two and four bytes long and stands for a single byte.
   1307     final byte[] result = new byte[input.size()];
   1308     int pos = 0;
   1309     for (int i = 0; i < input.size(); i++) {
   1310       byte c = input.byteAt(i);
   1311       if (c == '\\') {
   1312         if (i + 1 < input.size()) {
   1313           ++i;
   1314           c = input.byteAt(i);
   1315           if (isOctal(c)) {
   1316             // Octal escape.
   1317             int code = digitValue(c);
   1318             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
   1319               ++i;
   1320               code = code * 8 + digitValue(input.byteAt(i));
   1321             }
   1322             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
   1323               ++i;
   1324               code = code * 8 + digitValue(input.byteAt(i));
   1325             }
   1326             // TODO: Check that 0 <= code && code <= 0xFF.
   1327             result[pos++] = (byte)code;
   1328           } else {
   1329             switch (c) {
   1330               case 'a' : result[pos++] = 0x07; break;
   1331               case 'b' : result[pos++] = '\b'; break;
   1332               case 'f' : result[pos++] = '\f'; break;
   1333               case 'n' : result[pos++] = '\n'; break;
   1334               case 'r' : result[pos++] = '\r'; break;
   1335               case 't' : result[pos++] = '\t'; break;
   1336               case 'v' : result[pos++] = 0x0b; break;
   1337               case '\\': result[pos++] = '\\'; break;
   1338               case '\'': result[pos++] = '\''; break;
   1339               case '"' : result[pos++] = '\"'; break;
   1340 
   1341               case 'x':
   1342                 // hex escape
   1343                 int code = 0;
   1344                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
   1345                   ++i;
   1346                   code = digitValue(input.byteAt(i));
   1347                 } else {
   1348                   throw new InvalidEscapeSequenceException(
   1349                       "Invalid escape sequence: '\\x' with no digits");
   1350                 }
   1351                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
   1352                   ++i;
   1353                   code = code * 16 + digitValue(input.byteAt(i));
   1354                 }
   1355                 result[pos++] = (byte)code;
   1356                 break;
   1357 
   1358               default:
   1359                 throw new InvalidEscapeSequenceException(
   1360                     "Invalid escape sequence: '\\" + (char)c + '\'');
   1361             }
   1362           }
   1363         } else {
   1364           throw new InvalidEscapeSequenceException(
   1365               "Invalid escape sequence: '\\' at end of string.");
   1366         }
   1367       } else {
   1368         result[pos++] = c;
   1369       }
   1370     }
   1371 
   1372     return ByteString.copyFrom(result, 0, pos);
   1373   }
   1374 
   1375   /**
   1376    * Thrown by {@link TextFormat#unescapeBytes} and
   1377    * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
   1378    */
   1379   static class InvalidEscapeSequenceException extends IOException {
   1380     private static final long serialVersionUID = -8164033650142593304L;
   1381 
   1382     InvalidEscapeSequenceException(final String description) {
   1383       super(description);
   1384     }
   1385   }
   1386 
   1387   /**
   1388    * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
   1389    * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
   1390    * individually as a 3-digit octal escape.  Yes, it's weird.
   1391    */
   1392   static String escapeText(final String input) {
   1393     return escapeBytes(ByteString.copyFromUtf8(input));
   1394   }
   1395 
   1396   /**
   1397    * Un-escape a text string as escaped using {@link #escapeText(String)}.
   1398    * Two-digit hex escapes (starting with "\x") are also recognized.
   1399    */
   1400   static String unescapeText(final String input)
   1401                              throws InvalidEscapeSequenceException {
   1402     return unescapeBytes(input).toStringUtf8();
   1403   }
   1404 
   1405   /** Is this an octal digit? */
   1406   private static boolean isOctal(final byte c) {
   1407     return '0' <= c && c <= '7';
   1408   }
   1409 
   1410   /** Is this a hex digit? */
   1411   private static boolean isHex(final byte c) {
   1412     return ('0' <= c && c <= '9') ||
   1413            ('a' <= c && c <= 'f') ||
   1414            ('A' <= c && c <= 'F');
   1415   }
   1416 
   1417   /**
   1418    * Interpret a character as a digit (in any base up to 36) and return the
   1419    * numeric value.  This is like {@code Character.digit()} but we don't accept
   1420    * non-ASCII digits.
   1421    */
   1422   private static int digitValue(final byte c) {
   1423     if ('0' <= c && c <= '9') {
   1424       return c - '0';
   1425     } else if ('a' <= c && c <= 'z') {
   1426       return c - 'a' + 10;
   1427     } else {
   1428       return c - 'A' + 10;
   1429     }
   1430   }
   1431 
   1432   /**
   1433    * Parse a 32-bit signed integer from the text.  Unlike the Java standard
   1434    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
   1435    * and "0" to signify hexadecimal and octal numbers, respectively.
   1436    */
   1437   static int parseInt32(final String text) throws NumberFormatException {
   1438     return (int) parseInteger(text, true, false);
   1439   }
   1440 
   1441   /**
   1442    * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
   1443    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
   1444    * and "0" to signify hexadecimal and octal numbers, respectively.  The
   1445    * result is coerced to a (signed) {@code int} when returned since Java has
   1446    * no unsigned integer type.
   1447    */
   1448   static int parseUInt32(final String text) throws NumberFormatException {
   1449     return (int) parseInteger(text, false, false);
   1450   }
   1451 
   1452   /**
   1453    * Parse a 64-bit signed integer from the text.  Unlike the Java standard
   1454    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
   1455    * and "0" to signify hexadecimal and octal numbers, respectively.
   1456    */
   1457   static long parseInt64(final String text) throws NumberFormatException {
   1458     return parseInteger(text, true, true);
   1459   }
   1460 
   1461   /**
   1462    * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
   1463    * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
   1464    * and "0" to signify hexadecimal and octal numbers, respectively.  The
   1465    * result is coerced to a (signed) {@code long} when returned since Java has
   1466    * no unsigned long type.
   1467    */
   1468   static long parseUInt64(final String text) throws NumberFormatException {
   1469     return parseInteger(text, false, true);
   1470   }
   1471 
   1472   private static long parseInteger(final String text,
   1473                                    final boolean isSigned,
   1474                                    final boolean isLong)
   1475                                    throws NumberFormatException {
   1476     int pos = 0;
   1477 
   1478     boolean negative = false;
   1479     if (text.startsWith("-", pos)) {
   1480       if (!isSigned) {
   1481         throw new NumberFormatException("Number must be positive: " + text);
   1482       }
   1483       ++pos;
   1484       negative = true;
   1485     }
   1486 
   1487     int radix = 10;
   1488     if (text.startsWith("0x", pos)) {
   1489       pos += 2;
   1490       radix = 16;
   1491     } else if (text.startsWith("0", pos)) {
   1492       radix = 8;
   1493     }
   1494 
   1495     final String numberText = text.substring(pos);
   1496 
   1497     long result = 0;
   1498     if (numberText.length() < 16) {
   1499       // Can safely assume no overflow.
   1500       result = Long.parseLong(numberText, radix);
   1501       if (negative) {
   1502         result = -result;
   1503       }
   1504 
   1505       // Check bounds.
   1506       // No need to check for 64-bit numbers since they'd have to be 16 chars
   1507       // or longer to overflow.
   1508       if (!isLong) {
   1509         if (isSigned) {
   1510           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
   1511             throw new NumberFormatException(
   1512               "Number out of range for 32-bit signed integer: " + text);
   1513           }
   1514         } else {
   1515           if (result >= (1L << 32) || result < 0) {
   1516             throw new NumberFormatException(
   1517               "Number out of range for 32-bit unsigned integer: " + text);
   1518           }
   1519         }
   1520       }
   1521     } else {
   1522       BigInteger bigValue = new BigInteger(numberText, radix);
   1523       if (negative) {
   1524         bigValue = bigValue.negate();
   1525       }
   1526 
   1527       // Check bounds.
   1528       if (!isLong) {
   1529         if (isSigned) {
   1530           if (bigValue.bitLength() > 31) {
   1531             throw new NumberFormatException(
   1532               "Number out of range for 32-bit signed integer: " + text);
   1533           }
   1534         } else {
   1535           if (bigValue.bitLength() > 32) {
   1536             throw new NumberFormatException(
   1537               "Number out of range for 32-bit unsigned integer: " + text);
   1538           }
   1539         }
   1540       } else {
   1541         if (isSigned) {
   1542           if (bigValue.bitLength() > 63) {
   1543             throw new NumberFormatException(
   1544               "Number out of range for 64-bit signed integer: " + text);
   1545           }
   1546         } else {
   1547           if (bigValue.bitLength() > 64) {
   1548             throw new NumberFormatException(
   1549               "Number out of range for 64-bit unsigned integer: " + text);
   1550           }
   1551         }
   1552       }
   1553 
   1554       result = bigValue.longValue();
   1555     }
   1556 
   1557     return result;
   1558   }
   1559 }
   1560