Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: jschorr (at) google.com (Joseph Schorr)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 
     35 #include <algorithm>
     36 #include <float.h>
     37 #include <math.h>
     38 #include <stdio.h>
     39 #include <stack>
     40 #include <limits>
     41 #include <vector>
     42 
     43 #include <google/protobuf/text_format.h>
     44 
     45 #include <google/protobuf/descriptor.h>
     46 #include <google/protobuf/wire_format_lite.h>
     47 #include <google/protobuf/io/coded_stream.h>
     48 #include <google/protobuf/io/zero_copy_stream.h>
     49 #include <google/protobuf/io/zero_copy_stream_impl.h>
     50 #include <google/protobuf/unknown_field_set.h>
     51 #include <google/protobuf/descriptor.pb.h>
     52 #include <google/protobuf/io/tokenizer.h>
     53 #include <google/protobuf/stubs/strutil.h>
     54 #include <google/protobuf/stubs/map_util.h>
     55 #include <google/protobuf/stubs/stl_util.h>
     56 
     57 namespace google {
     58 namespace protobuf {
     59 
     60 namespace {
     61 
     62 inline bool IsHexNumber(const string& str) {
     63   return (str.length() >= 2 && str[0] == '0' &&
     64           (str[1] == 'x' || str[1] == 'X'));
     65 }
     66 
     67 inline bool IsOctNumber(const string& str) {
     68   return (str.length() >= 2 && str[0] == '0' &&
     69           (str[1] >= '0' && str[1] < '8'));
     70 }
     71 
     72 }  // namespace
     73 
     74 string Message::DebugString() const {
     75   string debug_string;
     76 
     77   TextFormat::PrintToString(*this, &debug_string);
     78 
     79   return debug_string;
     80 }
     81 
     82 string Message::ShortDebugString() const {
     83   string debug_string;
     84 
     85   TextFormat::Printer printer;
     86   printer.SetSingleLineMode(true);
     87 
     88   printer.PrintToString(*this, &debug_string);
     89   // Single line mode currently might have an extra space at the end.
     90   if (debug_string.size() > 0 &&
     91       debug_string[debug_string.size() - 1] == ' ') {
     92     debug_string.resize(debug_string.size() - 1);
     93   }
     94 
     95   return debug_string;
     96 }
     97 
     98 string Message::Utf8DebugString() const {
     99   string debug_string;
    100 
    101   TextFormat::Printer printer;
    102   printer.SetUseUtf8StringEscaping(true);
    103 
    104   printer.PrintToString(*this, &debug_string);
    105 
    106   return debug_string;
    107 }
    108 
    109 void Message::PrintDebugString() const {
    110   printf("%s", DebugString().c_str());
    111 }
    112 
    113 
    114 // ===========================================================================
    115 // Implementation of the parse information tree class.
    116 TextFormat::ParseInfoTree::ParseInfoTree() { }
    117 
    118 TextFormat::ParseInfoTree::~ParseInfoTree() {
    119   // Remove any nested information trees, as they are owned by this tree.
    120   for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
    121     STLDeleteElements(&(it->second));
    122   }
    123 }
    124 
    125 void TextFormat::ParseInfoTree::RecordLocation(
    126     const FieldDescriptor* field,
    127     TextFormat::ParseLocation location) {
    128   locations_[field].push_back(location);
    129 }
    130 
    131 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
    132     const FieldDescriptor* field) {
    133   // Owned by us in the map.
    134   TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
    135   vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
    136   GOOGLE_CHECK(trees);
    137   trees->push_back(instance);
    138   return instance;
    139 }
    140 
    141 void CheckFieldIndex(const FieldDescriptor* field, int index) {
    142   if (field == NULL) { return; }
    143 
    144   if (field->is_repeated() && index == -1) {
    145     GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
    146                 << "Field: " << field->name();
    147   } else if (!field->is_repeated() && index != -1) {
    148     GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
    149                 << "Field: " << field->name();
    150   }
    151 }
    152 
    153 TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
    154     const FieldDescriptor* field, int index) const {
    155   CheckFieldIndex(field, index);
    156   if (index == -1) { index = 0; }
    157 
    158   const vector<TextFormat::ParseLocation>* locations =
    159       FindOrNull(locations_, field);
    160   if (locations == NULL || index >= locations->size()) {
    161     return TextFormat::ParseLocation();
    162   }
    163 
    164   return (*locations)[index];
    165 }
    166 
    167 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
    168     const FieldDescriptor* field, int index) const {
    169   CheckFieldIndex(field, index);
    170   if (index == -1) { index = 0; }
    171 
    172   const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
    173   if (trees == NULL || index >= trees->size()) {
    174     return NULL;
    175   }
    176 
    177   return (*trees)[index];
    178 }
    179 
    180 
    181 // ===========================================================================
    182 // Internal class for parsing an ASCII representation of a Protocol Message.
    183 // This class makes use of the Protocol Message compiler's tokenizer found
    184 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
    185 // method is *not* thread-safe and should only be used in a single thread at
    186 // a time.
    187 
    188 // Makes code slightly more readable.  The meaning of "DO(foo)" is
    189 // "Execute foo and fail if it fails.", where failure is indicated by
    190 // returning false. Borrowed from parser.cc (Thanks Kenton!).
    191 #define DO(STATEMENT) if (STATEMENT) {} else return false
    192 
    193 class TextFormat::Parser::ParserImpl {
    194  public:
    195 
    196   // Determines if repeated values for non-repeated fields and
    197   // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
    198   // required/optional field named "foo", or "baz: 1 qux: 2"
    199   // where "baz" and "qux" are members of the same oneof.
    200   enum SingularOverwritePolicy {
    201     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
    202     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
    203   };
    204 
    205   ParserImpl(const Descriptor* root_message_type,
    206              io::ZeroCopyInputStream* input_stream,
    207              io::ErrorCollector* error_collector,
    208              TextFormat::Finder* finder,
    209              ParseInfoTree* parse_info_tree,
    210              SingularOverwritePolicy singular_overwrite_policy,
    211              bool allow_case_insensitive_field,
    212              bool allow_unknown_field,
    213              bool allow_unknown_enum,
    214              bool allow_field_number,
    215              bool allow_relaxed_whitespace)
    216     : error_collector_(error_collector),
    217       finder_(finder),
    218       parse_info_tree_(parse_info_tree),
    219       tokenizer_error_collector_(this),
    220       tokenizer_(input_stream, &tokenizer_error_collector_),
    221       root_message_type_(root_message_type),
    222       singular_overwrite_policy_(singular_overwrite_policy),
    223       allow_case_insensitive_field_(allow_case_insensitive_field),
    224       allow_unknown_field_(allow_unknown_field),
    225       allow_unknown_enum_(allow_unknown_enum),
    226       allow_field_number_(allow_field_number),
    227       had_errors_(false) {
    228     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
    229     // for floats.
    230     tokenizer_.set_allow_f_after_float(true);
    231 
    232     // '#' starts a comment.
    233     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
    234 
    235     if (allow_relaxed_whitespace) {
    236       tokenizer_.set_require_space_after_number(false);
    237       tokenizer_.set_allow_multiline_strings(true);
    238     }
    239 
    240     // Consume the starting token.
    241     tokenizer_.Next();
    242   }
    243   ~ParserImpl() { }
    244 
    245   // Parses the ASCII representation specified in input and saves the
    246   // information into the output pointer (a Message). Returns
    247   // false if an error occurs (an error will also be logged to
    248   // GOOGLE_LOG(ERROR)).
    249   bool Parse(Message* output) {
    250     // Consume fields until we cannot do so anymore.
    251     while (true) {
    252       if (LookingAtType(io::Tokenizer::TYPE_END)) {
    253         return !had_errors_;
    254       }
    255 
    256       DO(ConsumeField(output));
    257     }
    258   }
    259 
    260   bool ParseField(const FieldDescriptor* field, Message* output) {
    261     bool suc;
    262     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    263       suc = ConsumeFieldMessage(output, output->GetReflection(), field);
    264     } else {
    265       suc = ConsumeFieldValue(output, output->GetReflection(), field);
    266     }
    267     return suc && LookingAtType(io::Tokenizer::TYPE_END);
    268   }
    269 
    270   void ReportError(int line, int col, const string& message) {
    271     had_errors_ = true;
    272     if (error_collector_ == NULL) {
    273       if (line >= 0) {
    274         GOOGLE_LOG(ERROR) << "Error parsing text-format "
    275                    << root_message_type_->full_name()
    276                    << ": " << (line + 1) << ":"
    277                    << (col + 1) << ": " << message;
    278       } else {
    279         GOOGLE_LOG(ERROR) << "Error parsing text-format "
    280                    << root_message_type_->full_name()
    281                    << ": " << message;
    282       }
    283     } else {
    284       error_collector_->AddError(line, col, message);
    285     }
    286   }
    287 
    288   void ReportWarning(int line, int col, const string& message) {
    289     if (error_collector_ == NULL) {
    290       if (line >= 0) {
    291         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
    292                      << root_message_type_->full_name()
    293                      << ": " << (line + 1) << ":"
    294                      << (col + 1) << ": " << message;
    295       } else {
    296         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
    297                      << root_message_type_->full_name()
    298                      << ": " << message;
    299       }
    300     } else {
    301       error_collector_->AddWarning(line, col, message);
    302     }
    303   }
    304 
    305  private:
    306   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
    307 
    308   // Reports an error with the given message with information indicating
    309   // the position (as derived from the current token).
    310   void ReportError(const string& message) {
    311     ReportError(tokenizer_.current().line, tokenizer_.current().column,
    312                 message);
    313   }
    314 
    315   // Reports a warning with the given message with information indicating
    316   // the position (as derived from the current token).
    317   void ReportWarning(const string& message) {
    318     ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
    319                   message);
    320   }
    321 
    322   // Consumes the specified message with the given starting delimeter.
    323   // This method checks to see that the end delimeter at the conclusion of
    324   // the consumption matches the starting delimeter passed in here.
    325   bool ConsumeMessage(Message* message, const string delimeter) {
    326     while (!LookingAt(">") &&  !LookingAt("}")) {
    327       DO(ConsumeField(message));
    328     }
    329 
    330     // Confirm that we have a valid ending delimeter.
    331     DO(Consume(delimeter));
    332 
    333     return true;
    334   }
    335 
    336 
    337   // Consumes the current field (as returned by the tokenizer) on the
    338   // passed in message.
    339   bool ConsumeField(Message* message) {
    340     const Reflection* reflection = message->GetReflection();
    341     const Descriptor* descriptor = message->GetDescriptor();
    342 
    343     string field_name;
    344 
    345     const FieldDescriptor* field = NULL;
    346     int start_line = tokenizer_.current().line;
    347     int start_column = tokenizer_.current().column;
    348 
    349     if (TryConsume("[")) {
    350       // Extension.
    351       DO(ConsumeIdentifier(&field_name));
    352       while (TryConsume(".")) {
    353         string part;
    354         DO(ConsumeIdentifier(&part));
    355         field_name += ".";
    356         field_name += part;
    357       }
    358       DO(Consume("]"));
    359 
    360       field = (finder_ != NULL
    361                ? finder_->FindExtension(message, field_name)
    362                : reflection->FindKnownExtensionByName(field_name));
    363 
    364       if (field == NULL) {
    365         if (!allow_unknown_field_) {
    366           ReportError("Extension \"" + field_name + "\" is not defined or "
    367                       "is not an extension of \"" +
    368                       descriptor->full_name() + "\".");
    369           return false;
    370         } else {
    371           ReportWarning("Extension \"" + field_name + "\" is not defined or "
    372                         "is not an extension of \"" +
    373                         descriptor->full_name() + "\".");
    374         }
    375       }
    376     } else {
    377       DO(ConsumeIdentifier(&field_name));
    378 
    379       int32 field_number;
    380       if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
    381         if (descriptor->IsExtensionNumber(field_number)) {
    382           field = reflection->FindKnownExtensionByNumber(field_number);
    383         } else {
    384           field = descriptor->FindFieldByNumber(field_number);
    385         }
    386       } else {
    387         field = descriptor->FindFieldByName(field_name);
    388         // Group names are expected to be capitalized as they appear in the
    389         // .proto file, which actually matches their type names, not their
    390         // field names.
    391         if (field == NULL) {
    392           string lower_field_name = field_name;
    393           LowerString(&lower_field_name);
    394           field = descriptor->FindFieldByName(lower_field_name);
    395           // If the case-insensitive match worked but the field is NOT a group,
    396           if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
    397             field = NULL;
    398           }
    399         }
    400         // Again, special-case group names as described above.
    401         if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
    402             && field->message_type()->name() != field_name) {
    403           field = NULL;
    404         }
    405 
    406         if (field == NULL && allow_case_insensitive_field_) {
    407           string lower_field_name = field_name;
    408           LowerString(&lower_field_name);
    409           field = descriptor->FindFieldByLowercaseName(lower_field_name);
    410         }
    411       }
    412 
    413       if (field == NULL) {
    414         if (!allow_unknown_field_) {
    415           ReportError("Message type \"" + descriptor->full_name() +
    416                       "\" has no field named \"" + field_name + "\".");
    417           return false;
    418         } else {
    419           ReportWarning("Message type \"" + descriptor->full_name() +
    420                         "\" has no field named \"" + field_name + "\".");
    421         }
    422       }
    423     }
    424 
    425     // Skips unknown field.
    426     if (field == NULL) {
    427       GOOGLE_CHECK(allow_unknown_field_);
    428       // Try to guess the type of this field.
    429       // If this field is not a message, there should be a ":" between the
    430       // field name and the field value and also the field value should not
    431       // start with "{" or "<" which indicates the begining of a message body.
    432       // If there is no ":" or there is a "{" or "<" after ":", this field has
    433       // to be a message or the input is ill-formed.
    434       if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
    435         return SkipFieldValue();
    436       } else {
    437         return SkipFieldMessage();
    438       }
    439     }
    440 
    441     if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
    442       // Fail if the field is not repeated and it has already been specified.
    443       if (!field->is_repeated() && reflection->HasField(*message, field)) {
    444         ReportError("Non-repeated field \"" + field_name +
    445                     "\" is specified multiple times.");
    446         return false;
    447       }
    448       // Fail if the field is a member of a oneof and another member has already
    449       // been specified.
    450       const OneofDescriptor* oneof = field->containing_oneof();
    451       if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
    452         const FieldDescriptor* other_field =
    453             reflection->GetOneofFieldDescriptor(*message, oneof);
    454         ReportError("Field \"" + field_name + "\" is specified along with "
    455                     "field \"" + other_field->name() + "\", another member "
    456                     "of oneof \"" + oneof->name() + "\".");
    457         return false;
    458       }
    459     }
    460 
    461     // Perform special handling for embedded message types.
    462     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    463       // ':' is optional here.
    464       TryConsume(":");
    465     } else {
    466       // ':' is required here.
    467       DO(Consume(":"));
    468     }
    469 
    470     if (field->is_repeated() && TryConsume("[")) {
    471       // Short repeated format, e.g.  "foo: [1, 2, 3]"
    472       while (true) {
    473         if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    474           // Perform special handling for embedded message types.
    475           DO(ConsumeFieldMessage(message, reflection, field));
    476         } else {
    477           DO(ConsumeFieldValue(message, reflection, field));
    478         }
    479         if (TryConsume("]")) {
    480           break;
    481         }
    482         DO(Consume(","));
    483       }
    484     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    485       DO(ConsumeFieldMessage(message, reflection, field));
    486     } else {
    487       DO(ConsumeFieldValue(message, reflection, field));
    488     }
    489 
    490     // For historical reasons, fields may optionally be separated by commas or
    491     // semicolons.
    492     TryConsume(";") || TryConsume(",");
    493 
    494     if (field->options().deprecated()) {
    495       ReportWarning("text format contains deprecated field \""
    496                     + field_name + "\"");
    497     }
    498 
    499     // If a parse info tree exists, add the location for the parsed
    500     // field.
    501     if (parse_info_tree_ != NULL) {
    502       RecordLocation(parse_info_tree_, field,
    503                      ParseLocation(start_line, start_column));
    504     }
    505 
    506     return true;
    507   }
    508 
    509   // Skips the next field including the field's name and value.
    510   bool SkipField() {
    511     string field_name;
    512     if (TryConsume("[")) {
    513       // Extension name.
    514       DO(ConsumeIdentifier(&field_name));
    515       while (TryConsume(".")) {
    516         string part;
    517         DO(ConsumeIdentifier(&part));
    518         field_name += ".";
    519         field_name += part;
    520       }
    521       DO(Consume("]"));
    522     } else {
    523       DO(ConsumeIdentifier(&field_name));
    524     }
    525 
    526     // Try to guess the type of this field.
    527     // If this field is not a message, there should be a ":" between the
    528     // field name and the field value and also the field value should not
    529     // start with "{" or "<" which indicates the begining of a message body.
    530     // If there is no ":" or there is a "{" or "<" after ":", this field has
    531     // to be a message or the input is ill-formed.
    532     if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
    533       DO(SkipFieldValue());
    534     } else {
    535       DO(SkipFieldMessage());
    536     }
    537     // For historical reasons, fields may optionally be separated by commas or
    538     // semicolons.
    539     TryConsume(";") || TryConsume(",");
    540     return true;
    541   }
    542 
    543   bool ConsumeFieldMessage(Message* message,
    544                            const Reflection* reflection,
    545                            const FieldDescriptor* field) {
    546 
    547     // If the parse information tree is not NULL, create a nested one
    548     // for the nested message.
    549     ParseInfoTree* parent = parse_info_tree_;
    550     if (parent != NULL) {
    551       parse_info_tree_ = CreateNested(parent, field);
    552     }
    553 
    554     string delimeter;
    555     if (TryConsume("<")) {
    556       delimeter = ">";
    557     } else {
    558       DO(Consume("{"));
    559       delimeter = "}";
    560     }
    561 
    562     if (field->is_repeated()) {
    563       DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
    564     } else {
    565       DO(ConsumeMessage(reflection->MutableMessage(message, field),
    566                         delimeter));
    567     }
    568 
    569     // Reset the parse information tree.
    570     parse_info_tree_ = parent;
    571     return true;
    572   }
    573 
    574   // Skips the whole body of a message including the begining delimeter and
    575   // the ending delimeter.
    576   bool SkipFieldMessage() {
    577     string delimeter;
    578     if (TryConsume("<")) {
    579       delimeter = ">";
    580     } else {
    581       DO(Consume("{"));
    582       delimeter = "}";
    583     }
    584     while (!LookingAt(">") &&  !LookingAt("}")) {
    585       DO(SkipField());
    586     }
    587     DO(Consume(delimeter));
    588     return true;
    589   }
    590 
    591   bool ConsumeFieldValue(Message* message,
    592                          const Reflection* reflection,
    593                          const FieldDescriptor* field) {
    594 
    595 // Define an easy to use macro for setting fields. This macro checks
    596 // to see if the field is repeated (in which case we need to use the Add
    597 // methods or not (in which case we need to use the Set methods).
    598 #define SET_FIELD(CPPTYPE, VALUE)                                  \
    599         if (field->is_repeated()) {                                \
    600           reflection->Add##CPPTYPE(message, field, VALUE);         \
    601         } else {                                                   \
    602           reflection->Set##CPPTYPE(message, field, VALUE);         \
    603         }                                                          \
    604 
    605     switch(field->cpp_type()) {
    606       case FieldDescriptor::CPPTYPE_INT32: {
    607         int64 value;
    608         DO(ConsumeSignedInteger(&value, kint32max));
    609         SET_FIELD(Int32, static_cast<int32>(value));
    610         break;
    611       }
    612 
    613       case FieldDescriptor::CPPTYPE_UINT32: {
    614         uint64 value;
    615         DO(ConsumeUnsignedInteger(&value, kuint32max));
    616         SET_FIELD(UInt32, static_cast<uint32>(value));
    617         break;
    618       }
    619 
    620       case FieldDescriptor::CPPTYPE_INT64: {
    621         int64 value;
    622         DO(ConsumeSignedInteger(&value, kint64max));
    623         SET_FIELD(Int64, value);
    624         break;
    625       }
    626 
    627       case FieldDescriptor::CPPTYPE_UINT64: {
    628         uint64 value;
    629         DO(ConsumeUnsignedInteger(&value, kuint64max));
    630         SET_FIELD(UInt64, value);
    631         break;
    632       }
    633 
    634       case FieldDescriptor::CPPTYPE_FLOAT: {
    635         double value;
    636         DO(ConsumeDouble(&value));
    637         SET_FIELD(Float, static_cast<float>(value));
    638         break;
    639       }
    640 
    641       case FieldDescriptor::CPPTYPE_DOUBLE: {
    642         double value;
    643         DO(ConsumeDouble(&value));
    644         SET_FIELD(Double, value);
    645         break;
    646       }
    647 
    648       case FieldDescriptor::CPPTYPE_STRING: {
    649         string value;
    650         DO(ConsumeString(&value));
    651         SET_FIELD(String, value);
    652         break;
    653       }
    654 
    655       case FieldDescriptor::CPPTYPE_BOOL: {
    656         if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    657           uint64 value;
    658           DO(ConsumeUnsignedInteger(&value, 1));
    659           SET_FIELD(Bool, value);
    660         } else {
    661           string value;
    662           DO(ConsumeIdentifier(&value));
    663           if (value == "true" || value == "True" || value == "t") {
    664             SET_FIELD(Bool, true);
    665           } else if (value == "false" || value == "False" || value == "f") {
    666             SET_FIELD(Bool, false);
    667           } else {
    668             ReportError("Invalid value for boolean field \"" + field->name()
    669                         + "\". Value: \"" + value  + "\".");
    670             return false;
    671           }
    672         }
    673         break;
    674       }
    675 
    676       case FieldDescriptor::CPPTYPE_ENUM: {
    677         string value;
    678         const EnumDescriptor* enum_type = field->enum_type();
    679         const EnumValueDescriptor* enum_value = NULL;
    680 
    681         if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    682           DO(ConsumeIdentifier(&value));
    683           // Find the enumeration value.
    684           enum_value = enum_type->FindValueByName(value);
    685 
    686         } else if (LookingAt("-") ||
    687                    LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    688           int64 int_value;
    689           DO(ConsumeSignedInteger(&int_value, kint32max));
    690           value = SimpleItoa(int_value);        // for error reporting
    691           enum_value = enum_type->FindValueByNumber(int_value);
    692         } else {
    693           ReportError("Expected integer or identifier.");
    694           return false;
    695         }
    696 
    697         if (enum_value == NULL) {
    698           if (!allow_unknown_enum_) {
    699             ReportError("Unknown enumeration value of \"" + value  + "\" for "
    700                         "field \"" + field->name() + "\".");
    701             return false;
    702           } else {
    703             ReportWarning("Unknown enumeration value of \"" + value  + "\" for "
    704                           "field \"" + field->name() + "\".");
    705             return true;
    706           }
    707         }
    708 
    709         SET_FIELD(Enum, enum_value);
    710         break;
    711       }
    712 
    713       case FieldDescriptor::CPPTYPE_MESSAGE: {
    714         // We should never get here. Put here instead of a default
    715         // so that if new types are added, we get a nice compiler warning.
    716         GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
    717         break;
    718       }
    719     }
    720 #undef SET_FIELD
    721     return true;
    722   }
    723 
    724   bool SkipFieldValue() {
    725     if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
    726       while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
    727         tokenizer_.Next();
    728       }
    729       return true;
    730     }
    731     // Possible field values other than string:
    732     //   12345        => TYPE_INTEGER
    733     //   -12345       => TYPE_SYMBOL + TYPE_INTEGER
    734     //   1.2345       => TYPE_FLOAT
    735     //   -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
    736     //   inf          => TYPE_IDENTIFIER
    737     //   -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
    738     //   TYPE_INTEGER => TYPE_IDENTIFIER
    739     // Divides them into two group, one with TYPE_SYMBOL
    740     // and the other without:
    741     //   Group one:
    742     //     12345        => TYPE_INTEGER
    743     //     1.2345       => TYPE_FLOAT
    744     //     inf          => TYPE_IDENTIFIER
    745     //     TYPE_INTEGER => TYPE_IDENTIFIER
    746     //   Group two:
    747     //     -12345       => TYPE_SYMBOL + TYPE_INTEGER
    748     //     -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
    749     //     -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
    750     // As we can see, the field value consists of an optional '-' and one of
    751     // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
    752     bool has_minus = TryConsume("-");
    753     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
    754         !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
    755         !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    756       return false;
    757     }
    758     // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
    759     // value while other combinations all generate valid values.
    760     // We check if the value of this combination is valid here.
    761     // TYPE_IDENTIFIER after a '-' should be one of the float values listed
    762     // below:
    763     //   inf, inff, infinity, nan
    764     if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    765       string text = tokenizer_.current().text;
    766       LowerString(&text);
    767       if (text != "inf" &&
    768           text != "infinity" &&
    769           text != "nan") {
    770         ReportError("Invalid float number: " + text);
    771         return false;
    772       }
    773     }
    774     tokenizer_.Next();
    775     return true;
    776   }
    777 
    778   // Returns true if the current token's text is equal to that specified.
    779   bool LookingAt(const string& text) {
    780     return tokenizer_.current().text == text;
    781   }
    782 
    783   // Returns true if the current token's type is equal to that specified.
    784   bool LookingAtType(io::Tokenizer::TokenType token_type) {
    785     return tokenizer_.current().type == token_type;
    786   }
    787 
    788   // Consumes an identifier and saves its value in the identifier parameter.
    789   // Returns false if the token is not of type IDENTFIER.
    790   bool ConsumeIdentifier(string* identifier) {
    791     if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    792       *identifier = tokenizer_.current().text;
    793       tokenizer_.Next();
    794       return true;
    795     }
    796 
    797     // If allow_field_numer_ or allow_unknown_field_ is true, we should able
    798     // to parse integer identifiers.
    799     if ((allow_field_number_ || allow_unknown_field_)
    800         && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    801       *identifier = tokenizer_.current().text;
    802       tokenizer_.Next();
    803       return true;
    804     }
    805 
    806     ReportError("Expected identifier.");
    807     return false;
    808   }
    809 
    810   // Consumes a string and saves its value in the text parameter.
    811   // Returns false if the token is not of type STRING.
    812   bool ConsumeString(string* text) {
    813     if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
    814       ReportError("Expected string.");
    815       return false;
    816     }
    817 
    818     text->clear();
    819     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
    820       io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
    821 
    822       tokenizer_.Next();
    823     }
    824 
    825     return true;
    826   }
    827 
    828   // Consumes a uint64 and saves its value in the value parameter.
    829   // Returns false if the token is not of type INTEGER.
    830   bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
    831     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    832       ReportError("Expected integer.");
    833       return false;
    834     }
    835 
    836     if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
    837                                      max_value, value)) {
    838       ReportError("Integer out of range.");
    839       return false;
    840     }
    841 
    842     tokenizer_.Next();
    843     return true;
    844   }
    845 
    846   // Consumes an int64 and saves its value in the value parameter.
    847   // Note that since the tokenizer does not support negative numbers,
    848   // we actually may consume an additional token (for the minus sign) in this
    849   // method. Returns false if the token is not an integer
    850   // (signed or otherwise).
    851   bool ConsumeSignedInteger(int64* value, uint64 max_value) {
    852     bool negative = false;
    853 
    854     if (TryConsume("-")) {
    855       negative = true;
    856       // Two's complement always allows one more negative integer than
    857       // positive.
    858       ++max_value;
    859     }
    860 
    861     uint64 unsigned_value;
    862 
    863     DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
    864 
    865     *value = static_cast<int64>(unsigned_value);
    866 
    867     if (negative) {
    868       *value = -*value;
    869     }
    870 
    871     return true;
    872   }
    873 
    874   // Consumes a uint64 and saves its value in the value parameter.
    875   // Accepts decimal numbers only, rejects hex or oct numbers.
    876   bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
    877     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    878       ReportError("Expected integer.");
    879       return false;
    880     }
    881 
    882     const string& text = tokenizer_.current().text;
    883     if (IsHexNumber(text) || IsOctNumber(text)) {
    884       ReportError("Expect a decimal number.");
    885       return false;
    886     }
    887 
    888     if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
    889       ReportError("Integer out of range.");
    890       return false;
    891     }
    892 
    893     tokenizer_.Next();
    894     return true;
    895   }
    896 
    897   // Consumes a double and saves its value in the value parameter.
    898   // Note that since the tokenizer does not support negative numbers,
    899   // we actually may consume an additional token (for the minus sign) in this
    900   // method. Returns false if the token is not a double
    901   // (signed or otherwise).
    902   bool ConsumeDouble(double* value) {
    903     bool negative = false;
    904 
    905     if (TryConsume("-")) {
    906       negative = true;
    907     }
    908 
    909     // A double can actually be an integer, according to the tokenizer.
    910     // Therefore, we must check both cases here.
    911     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    912       // We have found an integer value for the double.
    913       uint64 integer_value;
    914       DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
    915 
    916       *value = static_cast<double>(integer_value);
    917     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
    918       // We have found a float value for the double.
    919       *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
    920 
    921       // Mark the current token as consumed.
    922       tokenizer_.Next();
    923     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    924       string text = tokenizer_.current().text;
    925       LowerString(&text);
    926       if (text == "inf" ||
    927           text == "infinity") {
    928         *value = std::numeric_limits<double>::infinity();
    929         tokenizer_.Next();
    930       } else if (text == "nan") {
    931         *value = std::numeric_limits<double>::quiet_NaN();
    932         tokenizer_.Next();
    933       } else {
    934         ReportError("Expected double.");
    935         return false;
    936       }
    937     } else {
    938       ReportError("Expected double.");
    939       return false;
    940     }
    941 
    942     if (negative) {
    943       *value = -*value;
    944     }
    945 
    946     return true;
    947   }
    948 
    949   // Consumes a token and confirms that it matches that specified in the
    950   // value parameter. Returns false if the token found does not match that
    951   // which was specified.
    952   bool Consume(const string& value) {
    953     const string& current_value = tokenizer_.current().text;
    954 
    955     if (current_value != value) {
    956       ReportError("Expected \"" + value + "\", found \"" + current_value
    957                   + "\".");
    958       return false;
    959     }
    960 
    961     tokenizer_.Next();
    962 
    963     return true;
    964   }
    965 
    966   // Attempts to consume the supplied value. Returns false if a the
    967   // token found does not match the value specified.
    968   bool TryConsume(const string& value) {
    969     if (tokenizer_.current().text == value) {
    970       tokenizer_.Next();
    971       return true;
    972     } else {
    973       return false;
    974     }
    975   }
    976 
    977   // An internal instance of the Tokenizer's error collector, used to
    978   // collect any base-level parse errors and feed them to the ParserImpl.
    979   class ParserErrorCollector : public io::ErrorCollector {
    980    public:
    981     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
    982         parser_(parser) { }
    983 
    984     virtual ~ParserErrorCollector() { }
    985 
    986     virtual void AddError(int line, int column, const string& message) {
    987       parser_->ReportError(line, column, message);
    988     }
    989 
    990     virtual void AddWarning(int line, int column, const string& message) {
    991       parser_->ReportWarning(line, column, message);
    992     }
    993 
    994    private:
    995     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
    996     TextFormat::Parser::ParserImpl* parser_;
    997   };
    998 
    999   io::ErrorCollector* error_collector_;
   1000   TextFormat::Finder* finder_;
   1001   ParseInfoTree* parse_info_tree_;
   1002   ParserErrorCollector tokenizer_error_collector_;
   1003   io::Tokenizer tokenizer_;
   1004   const Descriptor* root_message_type_;
   1005   SingularOverwritePolicy singular_overwrite_policy_;
   1006   const bool allow_case_insensitive_field_;
   1007   const bool allow_unknown_field_;
   1008   const bool allow_unknown_enum_;
   1009   const bool allow_field_number_;
   1010   bool had_errors_;
   1011 };
   1012 
   1013 #undef DO
   1014 
   1015 // ===========================================================================
   1016 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
   1017 // from the Printer found in //google/protobuf/io/printer.h
   1018 class TextFormat::Printer::TextGenerator {
   1019  public:
   1020   explicit TextGenerator(io::ZeroCopyOutputStream* output,
   1021                          int initial_indent_level)
   1022     : output_(output),
   1023       buffer_(NULL),
   1024       buffer_size_(0),
   1025       at_start_of_line_(true),
   1026       failed_(false),
   1027       indent_(""),
   1028       initial_indent_level_(initial_indent_level) {
   1029     indent_.resize(initial_indent_level_ * 2, ' ');
   1030   }
   1031 
   1032   ~TextGenerator() {
   1033     // Only BackUp() if we're sure we've successfully called Next() at least
   1034     // once.
   1035     if (!failed_ && buffer_size_ > 0) {
   1036       output_->BackUp(buffer_size_);
   1037     }
   1038   }
   1039 
   1040   // Indent text by two spaces.  After calling Indent(), two spaces will be
   1041   // inserted at the beginning of each line of text.  Indent() may be called
   1042   // multiple times to produce deeper indents.
   1043   void Indent() {
   1044     indent_ += "  ";
   1045   }
   1046 
   1047   // Reduces the current indent level by two spaces, or crashes if the indent
   1048   // level is zero.
   1049   void Outdent() {
   1050     if (indent_.empty() ||
   1051         indent_.size() < initial_indent_level_ * 2) {
   1052       GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
   1053       return;
   1054     }
   1055 
   1056     indent_.resize(indent_.size() - 2);
   1057   }
   1058 
   1059   // Print text to the output stream.
   1060   void Print(const string& str) {
   1061     Print(str.data(), str.size());
   1062   }
   1063 
   1064   // Print text to the output stream.
   1065   void Print(const char* text) {
   1066     Print(text, strlen(text));
   1067   }
   1068 
   1069   // Print text to the output stream.
   1070   void Print(const char* text, int size) {
   1071     int pos = 0;  // The number of bytes we've written so far.
   1072 
   1073     for (int i = 0; i < size; i++) {
   1074       if (text[i] == '\n') {
   1075         // Saw newline.  If there is more text, we may need to insert an indent
   1076         // here.  So, write what we have so far, including the '\n'.
   1077         Write(text + pos, i - pos + 1);
   1078         pos = i + 1;
   1079 
   1080         // Setting this true will cause the next Write() to insert an indent
   1081         // first.
   1082         at_start_of_line_ = true;
   1083       }
   1084     }
   1085 
   1086     // Write the rest.
   1087     Write(text + pos, size - pos);
   1088   }
   1089 
   1090   // True if any write to the underlying stream failed.  (We don't just
   1091   // crash in this case because this is an I/O failure, not a programming
   1092   // error.)
   1093   bool failed() const { return failed_; }
   1094 
   1095  private:
   1096   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
   1097 
   1098   void Write(const char* data, int size) {
   1099     if (failed_) return;
   1100     if (size == 0) return;
   1101 
   1102     if (at_start_of_line_) {
   1103       // Insert an indent.
   1104       at_start_of_line_ = false;
   1105       Write(indent_.data(), indent_.size());
   1106       if (failed_) return;
   1107     }
   1108 
   1109     while (size > buffer_size_) {
   1110       // Data exceeds space in the buffer.  Copy what we can and request a
   1111       // new buffer.
   1112       memcpy(buffer_, data, buffer_size_);
   1113       data += buffer_size_;
   1114       size -= buffer_size_;
   1115       void* void_buffer;
   1116       failed_ = !output_->Next(&void_buffer, &buffer_size_);
   1117       if (failed_) return;
   1118       buffer_ = reinterpret_cast<char*>(void_buffer);
   1119     }
   1120 
   1121     // Buffer is big enough to receive the data; copy it.
   1122     memcpy(buffer_, data, size);
   1123     buffer_ += size;
   1124     buffer_size_ -= size;
   1125   }
   1126 
   1127   io::ZeroCopyOutputStream* const output_;
   1128   char* buffer_;
   1129   int buffer_size_;
   1130   bool at_start_of_line_;
   1131   bool failed_;
   1132 
   1133   string indent_;
   1134   int initial_indent_level_;
   1135 };
   1136 
   1137 // ===========================================================================
   1138 
   1139 TextFormat::Finder::~Finder() {
   1140 }
   1141 
   1142 TextFormat::Parser::Parser()
   1143   : error_collector_(NULL),
   1144     finder_(NULL),
   1145     parse_info_tree_(NULL),
   1146     allow_partial_(false),
   1147     allow_case_insensitive_field_(false),
   1148     allow_unknown_field_(false),
   1149     allow_unknown_enum_(false),
   1150     allow_field_number_(false),
   1151     allow_relaxed_whitespace_(false),
   1152     allow_singular_overwrites_(false) {
   1153 }
   1154 
   1155 TextFormat::Parser::~Parser() {}
   1156 
   1157 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
   1158                                Message* output) {
   1159   output->Clear();
   1160 
   1161   ParserImpl::SingularOverwritePolicy overwrites_policy =
   1162       allow_singular_overwrites_
   1163       ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
   1164       : ParserImpl::FORBID_SINGULAR_OVERWRITES;
   1165 
   1166   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
   1167                     finder_, parse_info_tree_,
   1168                     overwrites_policy,
   1169                     allow_case_insensitive_field_, allow_unknown_field_,
   1170                     allow_unknown_enum_, allow_field_number_,
   1171                     allow_relaxed_whitespace_);
   1172   return MergeUsingImpl(input, output, &parser);
   1173 }
   1174 
   1175 bool TextFormat::Parser::ParseFromString(const string& input,
   1176                                          Message* output) {
   1177   io::ArrayInputStream input_stream(input.data(), input.size());
   1178   return Parse(&input_stream, output);
   1179 }
   1180 
   1181 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
   1182                                Message* output) {
   1183   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
   1184                     finder_, parse_info_tree_,
   1185                     ParserImpl::ALLOW_SINGULAR_OVERWRITES,
   1186                     allow_case_insensitive_field_, allow_unknown_field_,
   1187                     allow_unknown_enum_, allow_field_number_,
   1188                     allow_relaxed_whitespace_);
   1189   return MergeUsingImpl(input, output, &parser);
   1190 }
   1191 
   1192 bool TextFormat::Parser::MergeFromString(const string& input,
   1193                                          Message* output) {
   1194   io::ArrayInputStream input_stream(input.data(), input.size());
   1195   return Merge(&input_stream, output);
   1196 }
   1197 
   1198 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
   1199                                         Message* output,
   1200                                         ParserImpl* parser_impl) {
   1201   if (!parser_impl->Parse(output)) return false;
   1202   if (!allow_partial_ && !output->IsInitialized()) {
   1203     vector<string> missing_fields;
   1204     output->FindInitializationErrors(&missing_fields);
   1205     parser_impl->ReportError(-1, 0, "Message missing required fields: " +
   1206                                         Join(missing_fields, ", "));
   1207     return false;
   1208   }
   1209   return true;
   1210 }
   1211 
   1212 bool TextFormat::Parser::ParseFieldValueFromString(
   1213     const string& input,
   1214     const FieldDescriptor* field,
   1215     Message* output) {
   1216   io::ArrayInputStream input_stream(input.data(), input.size());
   1217   ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
   1218                     finder_, parse_info_tree_,
   1219                     ParserImpl::ALLOW_SINGULAR_OVERWRITES,
   1220                     allow_case_insensitive_field_, allow_unknown_field_,
   1221                     allow_unknown_enum_, allow_field_number_,
   1222                     allow_relaxed_whitespace_);
   1223   return parser.ParseField(field, output);
   1224 }
   1225 
   1226 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
   1227                                     Message* output) {
   1228   return Parser().Parse(input, output);
   1229 }
   1230 
   1231 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
   1232                                     Message* output) {
   1233   return Parser().Merge(input, output);
   1234 }
   1235 
   1236 /* static */ bool TextFormat::ParseFromString(const string& input,
   1237                                               Message* output) {
   1238   return Parser().ParseFromString(input, output);
   1239 }
   1240 
   1241 /* static */ bool TextFormat::MergeFromString(const string& input,
   1242                                               Message* output) {
   1243   return Parser().MergeFromString(input, output);
   1244 }
   1245 
   1246 // ===========================================================================
   1247 
   1248 // The default implementation for FieldValuePrinter. The base class just
   1249 // does simple formatting. That way, deriving classes could decide to fallback
   1250 // to that behavior.
   1251 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
   1252 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
   1253 string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
   1254   return val ? "true" : "false";
   1255 }
   1256 string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
   1257   return SimpleItoa(val);
   1258 }
   1259 string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
   1260   return SimpleItoa(val);
   1261 }
   1262 string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
   1263   return SimpleItoa(val);
   1264 }
   1265 string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
   1266   return SimpleItoa(val);
   1267 }
   1268 string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
   1269   return SimpleFtoa(val);
   1270 }
   1271 string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
   1272   return SimpleDtoa(val);
   1273 }
   1274 string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
   1275   return StrCat("\"", CEscape(val), "\"");
   1276 }
   1277 string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
   1278   return PrintString(val);
   1279 }
   1280 string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
   1281                                                 const string& name) const {
   1282   return name;
   1283 }
   1284 string TextFormat::FieldValuePrinter::PrintFieldName(
   1285     const Message& message,
   1286     const Reflection* reflection,
   1287     const FieldDescriptor* field) const {
   1288   if (field->is_extension()) {
   1289     // We special-case MessageSet elements for compatibility with proto1.
   1290     if (field->containing_type()->options().message_set_wire_format()
   1291         && field->type() == FieldDescriptor::TYPE_MESSAGE
   1292         && field->is_optional()
   1293         && field->extension_scope() == field->message_type()) {
   1294       return StrCat("[", field->message_type()->full_name(), "]");
   1295     } else {
   1296       return StrCat("[", field->full_name(), "]");
   1297     }
   1298   } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
   1299     // Groups must be serialized with their original capitalization.
   1300     return field->message_type()->name();
   1301   } else {
   1302     return field->name();
   1303   }
   1304 }
   1305 string TextFormat::FieldValuePrinter::PrintMessageStart(
   1306     const Message& message,
   1307     int field_index,
   1308     int field_count,
   1309     bool single_line_mode) const {
   1310   return single_line_mode ? " { " : " {\n";
   1311 }
   1312 string TextFormat::FieldValuePrinter::PrintMessageEnd(
   1313     const Message& message,
   1314     int field_index,
   1315     int field_count,
   1316     bool single_line_mode) const {
   1317   return single_line_mode ? "} " : "}\n";
   1318 }
   1319 
   1320 namespace {
   1321 // Our own specialization: for UTF8 escaped strings.
   1322 class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
   1323  public:
   1324   virtual string PrintString(const string& val) const {
   1325     return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
   1326   }
   1327   virtual string PrintBytes(const string& val) const {
   1328     return TextFormat::FieldValuePrinter::PrintString(val);
   1329   }
   1330 };
   1331 
   1332 }  // namespace
   1333 
   1334 TextFormat::Printer::Printer()
   1335   : initial_indent_level_(0),
   1336     single_line_mode_(false),
   1337     use_field_number_(false),
   1338     use_short_repeated_primitives_(false),
   1339     hide_unknown_fields_(false),
   1340     print_message_fields_in_index_order_(false) {
   1341   SetUseUtf8StringEscaping(false);
   1342 }
   1343 
   1344 TextFormat::Printer::~Printer() {
   1345   STLDeleteValues(&custom_printers_);
   1346 }
   1347 
   1348 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
   1349   SetDefaultFieldValuePrinter(as_utf8
   1350                               ? new FieldValuePrinterUtf8Escaping()
   1351                               : new FieldValuePrinter());
   1352 }
   1353 
   1354 void TextFormat::Printer::SetDefaultFieldValuePrinter(
   1355     const FieldValuePrinter* printer) {
   1356   default_field_value_printer_.reset(printer);
   1357 }
   1358 
   1359 bool TextFormat::Printer::RegisterFieldValuePrinter(
   1360     const FieldDescriptor* field,
   1361     const FieldValuePrinter* printer) {
   1362   return field != NULL
   1363       && printer != NULL
   1364       && custom_printers_.insert(make_pair(field, printer)).second;
   1365 }
   1366 
   1367 bool TextFormat::Printer::PrintToString(const Message& message,
   1368                                         string* output) const {
   1369   GOOGLE_DCHECK(output) << "output specified is NULL";
   1370 
   1371   output->clear();
   1372   io::StringOutputStream output_stream(output);
   1373 
   1374   return Print(message, &output_stream);
   1375 }
   1376 
   1377 bool TextFormat::Printer::PrintUnknownFieldsToString(
   1378     const UnknownFieldSet& unknown_fields,
   1379     string* output) const {
   1380   GOOGLE_DCHECK(output) << "output specified is NULL";
   1381 
   1382   output->clear();
   1383   io::StringOutputStream output_stream(output);
   1384   return PrintUnknownFields(unknown_fields, &output_stream);
   1385 }
   1386 
   1387 bool TextFormat::Printer::Print(const Message& message,
   1388                                 io::ZeroCopyOutputStream* output) const {
   1389   TextGenerator generator(output, initial_indent_level_);
   1390 
   1391   Print(message, generator);
   1392 
   1393   // Output false if the generator failed internally.
   1394   return !generator.failed();
   1395 }
   1396 
   1397 bool TextFormat::Printer::PrintUnknownFields(
   1398     const UnknownFieldSet& unknown_fields,
   1399     io::ZeroCopyOutputStream* output) const {
   1400   TextGenerator generator(output, initial_indent_level_);
   1401 
   1402   PrintUnknownFields(unknown_fields, generator);
   1403 
   1404   // Output false if the generator failed internally.
   1405   return !generator.failed();
   1406 }
   1407 
   1408 namespace {
   1409 // Comparison functor for sorting FieldDescriptors by field index.
   1410 struct FieldIndexSorter {
   1411   bool operator()(const FieldDescriptor* left,
   1412                   const FieldDescriptor* right) const {
   1413     return left->index() < right->index();
   1414   }
   1415 };
   1416 }  // namespace
   1417 
   1418 void TextFormat::Printer::Print(const Message& message,
   1419                                 TextGenerator& generator) const {
   1420   const Reflection* reflection = message.GetReflection();
   1421   vector<const FieldDescriptor*> fields;
   1422   reflection->ListFields(message, &fields);
   1423   if (print_message_fields_in_index_order_) {
   1424     sort(fields.begin(), fields.end(), FieldIndexSorter());
   1425   }
   1426   for (int i = 0; i < fields.size(); i++) {
   1427     PrintField(message, reflection, fields[i], generator);
   1428   }
   1429   if (!hide_unknown_fields_) {
   1430     PrintUnknownFields(reflection->GetUnknownFields(message), generator);
   1431   }
   1432 }
   1433 
   1434 void TextFormat::Printer::PrintFieldValueToString(
   1435     const Message& message,
   1436     const FieldDescriptor* field,
   1437     int index,
   1438     string* output) const {
   1439 
   1440   GOOGLE_DCHECK(output) << "output specified is NULL";
   1441 
   1442   output->clear();
   1443   io::StringOutputStream output_stream(output);
   1444   TextGenerator generator(&output_stream, initial_indent_level_);
   1445 
   1446   PrintFieldValue(message, message.GetReflection(), field, index, generator);
   1447 }
   1448 
   1449 void TextFormat::Printer::PrintField(const Message& message,
   1450                                      const Reflection* reflection,
   1451                                      const FieldDescriptor* field,
   1452                                      TextGenerator& generator) const {
   1453   if (use_short_repeated_primitives_ &&
   1454       field->is_repeated() &&
   1455       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
   1456       field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
   1457     PrintShortRepeatedField(message, reflection, field, generator);
   1458     return;
   1459   }
   1460 
   1461   int count = 0;
   1462 
   1463   if (field->is_repeated()) {
   1464     count = reflection->FieldSize(message, field);
   1465   } else if (reflection->HasField(message, field)) {
   1466     count = 1;
   1467   }
   1468 
   1469   for (int j = 0; j < count; ++j) {
   1470     const int field_index = field->is_repeated() ? j : -1;
   1471 
   1472     PrintFieldName(message, reflection, field, generator);
   1473 
   1474     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   1475       const FieldValuePrinter* printer = FindWithDefault(
   1476           custom_printers_, field, default_field_value_printer_.get());
   1477       const Message& sub_message =
   1478               field->is_repeated()
   1479               ? reflection->GetRepeatedMessage(message, field, j)
   1480               : reflection->GetMessage(message, field);
   1481       generator.Print(
   1482           printer->PrintMessageStart(
   1483               sub_message, field_index, count, single_line_mode_));
   1484       generator.Indent();
   1485       Print(sub_message, generator);
   1486       generator.Outdent();
   1487       generator.Print(
   1488           printer->PrintMessageEnd(
   1489               sub_message, field_index, count, single_line_mode_));
   1490     } else {
   1491       generator.Print(": ");
   1492       // Write the field value.
   1493       PrintFieldValue(message, reflection, field, field_index, generator);
   1494       if (single_line_mode_) {
   1495         generator.Print(" ");
   1496       } else {
   1497         generator.Print("\n");
   1498       }
   1499     }
   1500   }
   1501 }
   1502 
   1503 void TextFormat::Printer::PrintShortRepeatedField(
   1504     const Message& message,
   1505     const Reflection* reflection,
   1506     const FieldDescriptor* field,
   1507     TextGenerator& generator) const {
   1508   // Print primitive repeated field in short form.
   1509   PrintFieldName(message, reflection, field, generator);
   1510 
   1511   int size = reflection->FieldSize(message, field);
   1512   generator.Print(": [");
   1513   for (int i = 0; i < size; i++) {
   1514     if (i > 0) generator.Print(", ");
   1515     PrintFieldValue(message, reflection, field, i, generator);
   1516   }
   1517   if (single_line_mode_) {
   1518     generator.Print("] ");
   1519   } else {
   1520     generator.Print("]\n");
   1521   }
   1522 }
   1523 
   1524 void TextFormat::Printer::PrintFieldName(const Message& message,
   1525                                          const Reflection* reflection,
   1526                                          const FieldDescriptor* field,
   1527                                          TextGenerator& generator) const {
   1528   // if use_field_number_ is true, prints field number instead
   1529   // of field name.
   1530   if (use_field_number_) {
   1531     generator.Print(SimpleItoa(field->number()));
   1532     return;
   1533   }
   1534 
   1535   const FieldValuePrinter* printer = FindWithDefault(
   1536       custom_printers_, field, default_field_value_printer_.get());
   1537   generator.Print(printer->PrintFieldName(message, reflection, field));
   1538 }
   1539 
   1540 void TextFormat::Printer::PrintFieldValue(
   1541     const Message& message,
   1542     const Reflection* reflection,
   1543     const FieldDescriptor* field,
   1544     int index,
   1545     TextGenerator& generator) const {
   1546   GOOGLE_DCHECK(field->is_repeated() || (index == -1))
   1547       << "Index must be -1 for non-repeated fields";
   1548 
   1549   const FieldValuePrinter* printer
   1550       = FindWithDefault(custom_printers_, field,
   1551                         default_field_value_printer_.get());
   1552 
   1553   switch (field->cpp_type()) {
   1554 #define OUTPUT_FIELD(CPPTYPE, METHOD)                                   \
   1555     case FieldDescriptor::CPPTYPE_##CPPTYPE:                            \
   1556       generator.Print(printer->Print##METHOD(field->is_repeated()       \
   1557                ? reflection->GetRepeated##METHOD(message, field, index) \
   1558                : reflection->Get##METHOD(message, field)));             \
   1559         break
   1560 
   1561     OUTPUT_FIELD( INT32,  Int32);
   1562     OUTPUT_FIELD( INT64,  Int64);
   1563     OUTPUT_FIELD(UINT32, UInt32);
   1564     OUTPUT_FIELD(UINT64, UInt64);
   1565     OUTPUT_FIELD( FLOAT,  Float);
   1566     OUTPUT_FIELD(DOUBLE, Double);
   1567     OUTPUT_FIELD(  BOOL,   Bool);
   1568 #undef OUTPUT_FIELD
   1569 
   1570     case FieldDescriptor::CPPTYPE_STRING: {
   1571       string scratch;
   1572       const string& value = field->is_repeated()
   1573           ? reflection->GetRepeatedStringReference(
   1574               message, field, index, &scratch)
   1575           : reflection->GetStringReference(message, field, &scratch);
   1576       if (field->type() == FieldDescriptor::TYPE_STRING) {
   1577         generator.Print(printer->PrintString(value));
   1578       } else {
   1579         GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
   1580         generator.Print(printer->PrintBytes(value));
   1581       }
   1582       break;
   1583     }
   1584 
   1585     case FieldDescriptor::CPPTYPE_ENUM: {
   1586       const EnumValueDescriptor *enum_val = field->is_repeated()
   1587           ? reflection->GetRepeatedEnum(message, field, index)
   1588           : reflection->GetEnum(message, field);
   1589       generator.Print(printer->PrintEnum(enum_val->number(), enum_val->name()));
   1590       break;
   1591     }
   1592 
   1593     case FieldDescriptor::CPPTYPE_MESSAGE:
   1594       Print(field->is_repeated()
   1595             ? reflection->GetRepeatedMessage(message, field, index)
   1596             : reflection->GetMessage(message, field),
   1597             generator);
   1598       break;
   1599   }
   1600 }
   1601 
   1602 /* static */ bool TextFormat::Print(const Message& message,
   1603                                     io::ZeroCopyOutputStream* output) {
   1604   return Printer().Print(message, output);
   1605 }
   1606 
   1607 /* static */ bool TextFormat::PrintUnknownFields(
   1608     const UnknownFieldSet& unknown_fields,
   1609     io::ZeroCopyOutputStream* output) {
   1610   return Printer().PrintUnknownFields(unknown_fields, output);
   1611 }
   1612 
   1613 /* static */ bool TextFormat::PrintToString(
   1614     const Message& message, string* output) {
   1615   return Printer().PrintToString(message, output);
   1616 }
   1617 
   1618 /* static */ bool TextFormat::PrintUnknownFieldsToString(
   1619     const UnknownFieldSet& unknown_fields, string* output) {
   1620   return Printer().PrintUnknownFieldsToString(unknown_fields, output);
   1621 }
   1622 
   1623 /* static */ void TextFormat::PrintFieldValueToString(
   1624     const Message& message,
   1625     const FieldDescriptor* field,
   1626     int index,
   1627     string* output) {
   1628   return Printer().PrintFieldValueToString(message, field, index, output);
   1629 }
   1630 
   1631 /* static */ bool TextFormat::ParseFieldValueFromString(
   1632     const string& input,
   1633     const FieldDescriptor* field,
   1634     Message* message) {
   1635   return Parser().ParseFieldValueFromString(input, field, message);
   1636 }
   1637 
   1638 // Prints an integer as hex with a fixed number of digits dependent on the
   1639 // integer type.
   1640 template<typename IntType>
   1641 static string PaddedHex(IntType value) {
   1642   string result;
   1643   result.reserve(sizeof(value) * 2);
   1644   for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
   1645     result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
   1646   }
   1647   return result;
   1648 }
   1649 
   1650 void TextFormat::Printer::PrintUnknownFields(
   1651     const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
   1652   for (int i = 0; i < unknown_fields.field_count(); i++) {
   1653     const UnknownField& field = unknown_fields.field(i);
   1654     string field_number = SimpleItoa(field.number());
   1655 
   1656     switch (field.type()) {
   1657       case UnknownField::TYPE_VARINT:
   1658         generator.Print(field_number);
   1659         generator.Print(": ");
   1660         generator.Print(SimpleItoa(field.varint()));
   1661         if (single_line_mode_) {
   1662           generator.Print(" ");
   1663         } else {
   1664           generator.Print("\n");
   1665         }
   1666         break;
   1667       case UnknownField::TYPE_FIXED32: {
   1668         generator.Print(field_number);
   1669         generator.Print(": 0x");
   1670         char buffer[kFastToBufferSize];
   1671         generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
   1672         if (single_line_mode_) {
   1673           generator.Print(" ");
   1674         } else {
   1675           generator.Print("\n");
   1676         }
   1677         break;
   1678       }
   1679       case UnknownField::TYPE_FIXED64: {
   1680         generator.Print(field_number);
   1681         generator.Print(": 0x");
   1682         char buffer[kFastToBufferSize];
   1683         generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
   1684         if (single_line_mode_) {
   1685           generator.Print(" ");
   1686         } else {
   1687           generator.Print("\n");
   1688         }
   1689         break;
   1690       }
   1691       case UnknownField::TYPE_LENGTH_DELIMITED: {
   1692         generator.Print(field_number);
   1693         const string& value = field.length_delimited();
   1694         UnknownFieldSet embedded_unknown_fields;
   1695         if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
   1696           // This field is parseable as a Message.
   1697           // So it is probably an embedded message.
   1698           if (single_line_mode_) {
   1699             generator.Print(" { ");
   1700           } else {
   1701             generator.Print(" {\n");
   1702             generator.Indent();
   1703           }
   1704           PrintUnknownFields(embedded_unknown_fields, generator);
   1705           if (single_line_mode_) {
   1706             generator.Print("} ");
   1707           } else {
   1708             generator.Outdent();
   1709             generator.Print("}\n");
   1710           }
   1711         } else {
   1712           // This field is not parseable as a Message.
   1713           // So it is probably just a plain string.
   1714           generator.Print(": \"");
   1715           generator.Print(CEscape(value));
   1716           generator.Print("\"");
   1717           if (single_line_mode_) {
   1718             generator.Print(" ");
   1719           } else {
   1720             generator.Print("\n");
   1721           }
   1722         }
   1723         break;
   1724       }
   1725       case UnknownField::TYPE_GROUP:
   1726         generator.Print(field_number);
   1727         if (single_line_mode_) {
   1728           generator.Print(" { ");
   1729         } else {
   1730           generator.Print(" {\n");
   1731           generator.Indent();
   1732         }
   1733         PrintUnknownFields(field.group(), generator);
   1734         if (single_line_mode_) {
   1735           generator.Print("} ");
   1736         } else {
   1737           generator.Outdent();
   1738           generator.Print("}\n");
   1739         }
   1740         break;
   1741     }
   1742   }
   1743 }
   1744 
   1745 }  // namespace protobuf
   1746 }  // namespace google
   1747