Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: jschorr (at) google.com (Joseph Schorr)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 
     35 #include <float.h>
     36 #include <math.h>
     37 #include <stdio.h>
     38 #include <stack>
     39 #include <limits>
     40 
     41 #include <google/protobuf/text_format.h>
     42 
     43 #include <google/protobuf/descriptor.h>
     44 #include <google/protobuf/io/coded_stream.h>
     45 #include <google/protobuf/io/zero_copy_stream.h>
     46 #include <google/protobuf/io/zero_copy_stream_impl.h>
     47 #include <google/protobuf/unknown_field_set.h>
     48 #include <google/protobuf/descriptor.pb.h>
     49 #include <google/protobuf/io/tokenizer.h>
     50 #include <google/protobuf/stubs/strutil.h>
     51 
     52 namespace google {
     53 namespace protobuf {
     54 
     55 string Message::DebugString() const {
     56   string debug_string;
     57 
     58   TextFormat::PrintToString(*this, &debug_string);
     59 
     60   return debug_string;
     61 }
     62 
     63 string Message::ShortDebugString() const {
     64   string debug_string;
     65 
     66   TextFormat::Printer printer;
     67   printer.SetSingleLineMode(true);
     68 
     69   printer.PrintToString(*this, &debug_string);
     70   // Single line mode currently might have an extra space at the end.
     71   if (debug_string.size() > 0 &&
     72       debug_string[debug_string.size() - 1] == ' ') {
     73     debug_string.resize(debug_string.size() - 1);
     74   }
     75 
     76   return debug_string;
     77 }
     78 
     79 string Message::Utf8DebugString() const {
     80   string debug_string;
     81 
     82   TextFormat::Printer printer;
     83   printer.SetUseUtf8StringEscaping(true);
     84 
     85   printer.PrintToString(*this, &debug_string);
     86 
     87   return debug_string;
     88 }
     89 
     90 void Message::PrintDebugString() const {
     91   printf("%s", DebugString().c_str());
     92 }
     93 
     94 
     95 // ===========================================================================
     96 // Internal class for parsing an ASCII representation of a Protocol Message.
     97 // This class makes use of the Protocol Message compiler's tokenizer found
     98 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
     99 // method is *not* thread-safe and should only be used in a single thread at
    100 // a time.
    101 
    102 // Makes code slightly more readable.  The meaning of "DO(foo)" is
    103 // "Execute foo and fail if it fails.", where failure is indicated by
    104 // returning false. Borrowed from parser.cc (Thanks Kenton!).
    105 #define DO(STATEMENT) if (STATEMENT) {} else return false
    106 
    107 class TextFormat::Parser::ParserImpl {
    108  public:
    109 
    110   // Determines if repeated values for a non-repeated field are
    111   // permitted, e.g., the string "foo: 1 foo: 2" for a
    112   // required/optional field named "foo".
    113   enum SingularOverwritePolicy {
    114     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
    115     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
    116   };
    117 
    118   ParserImpl(const Descriptor* root_message_type,
    119              io::ZeroCopyInputStream* input_stream,
    120              io::ErrorCollector* error_collector,
    121              SingularOverwritePolicy singular_overwrite_policy)
    122     : error_collector_(error_collector),
    123       tokenizer_error_collector_(this),
    124       tokenizer_(input_stream, &tokenizer_error_collector_),
    125       root_message_type_(root_message_type),
    126       singular_overwrite_policy_(singular_overwrite_policy),
    127       had_errors_(false) {
    128     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
    129     // for floats.
    130     tokenizer_.set_allow_f_after_float(true);
    131 
    132     // '#' starts a comment.
    133     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
    134 
    135     // Consume the starting token.
    136     tokenizer_.Next();
    137   }
    138   ~ParserImpl() { }
    139 
    140   // Parses the ASCII representation specified in input and saves the
    141   // information into the output pointer (a Message). Returns
    142   // false if an error occurs (an error will also be logged to
    143   // GOOGLE_LOG(ERROR)).
    144   bool Parse(Message* output) {
    145     // Consume fields until we cannot do so anymore.
    146     while(true) {
    147       if (LookingAtType(io::Tokenizer::TYPE_END)) {
    148         return !had_errors_;
    149       }
    150 
    151       DO(ConsumeField(output));
    152     }
    153   }
    154 
    155   bool ParseField(const FieldDescriptor* field, Message* output) {
    156     bool suc;
    157     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    158       suc = ConsumeFieldMessage(output, output->GetReflection(), field);
    159     } else {
    160       suc = ConsumeFieldValue(output, output->GetReflection(), field);
    161     }
    162     return suc && LookingAtType(io::Tokenizer::TYPE_END);
    163   }
    164 
    165   void ReportError(int line, int col, const string& message) {
    166     had_errors_ = true;
    167     if (error_collector_ == NULL) {
    168       if (line >= 0) {
    169         GOOGLE_LOG(ERROR) << "Error parsing text-format "
    170                    << root_message_type_->full_name()
    171                    << ": " << (line + 1) << ":"
    172                    << (col + 1) << ": " << message;
    173       } else {
    174         GOOGLE_LOG(ERROR) << "Error parsing text-format "
    175                    << root_message_type_->full_name()
    176                    << ": " << message;
    177       }
    178     } else {
    179       error_collector_->AddError(line, col, message);
    180     }
    181   }
    182 
    183   void ReportWarning(int line, int col, const string& message) {
    184     if (error_collector_ == NULL) {
    185       if (line >= 0) {
    186         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
    187                      << root_message_type_->full_name()
    188                      << ": " << (line + 1) << ":"
    189                      << (col + 1) << ": " << message;
    190       } else {
    191         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
    192                      << root_message_type_->full_name()
    193                      << ": " << message;
    194       }
    195     } else {
    196       error_collector_->AddWarning(line, col, message);
    197     }
    198   }
    199 
    200  private:
    201   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
    202 
    203   // Reports an error with the given message with information indicating
    204   // the position (as derived from the current token).
    205   void ReportError(const string& message) {
    206     ReportError(tokenizer_.current().line, tokenizer_.current().column,
    207                 message);
    208   }
    209 
    210   // Reports a warning with the given message with information indicating
    211   // the position (as derived from the current token).
    212   void ReportWarning(const string& message) {
    213     ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
    214                   message);
    215   }
    216 
    217   // Consumes the specified message with the given starting delimeter.
    218   // This method checks to see that the end delimeter at the conclusion of
    219   // the consumption matches the starting delimeter passed in here.
    220   bool ConsumeMessage(Message* message, const string delimeter) {
    221     while (!LookingAt(">") &&  !LookingAt("}")) {
    222       DO(ConsumeField(message));
    223     }
    224 
    225     // Confirm that we have a valid ending delimeter.
    226     DO(Consume(delimeter));
    227 
    228     return true;
    229   }
    230 
    231   // Consumes the current field (as returned by the tokenizer) on the
    232   // passed in message.
    233   bool ConsumeField(Message* message) {
    234     const Reflection* reflection = message->GetReflection();
    235     const Descriptor* descriptor = message->GetDescriptor();
    236 
    237     string field_name;
    238 
    239     const FieldDescriptor* field = NULL;
    240 
    241     if (TryConsume("[")) {
    242       // Extension.
    243       DO(ConsumeIdentifier(&field_name));
    244       while (TryConsume(".")) {
    245         string part;
    246         DO(ConsumeIdentifier(&part));
    247         field_name += ".";
    248         field_name += part;
    249       }
    250       DO(Consume("]"));
    251 
    252       field = reflection->FindKnownExtensionByName(field_name);
    253 
    254       if (field == NULL) {
    255         ReportError("Extension \"" + field_name + "\" is not defined or "
    256                     "is not an extension of \"" +
    257                     descriptor->full_name() + "\".");
    258         return false;
    259       }
    260     } else {
    261       DO(ConsumeIdentifier(&field_name));
    262 
    263       field = descriptor->FindFieldByName(field_name);
    264       // Group names are expected to be capitalized as they appear in the
    265       // .proto file, which actually matches their type names, not their field
    266       // names.
    267       if (field == NULL) {
    268         string lower_field_name = field_name;
    269         LowerString(&lower_field_name);
    270         field = descriptor->FindFieldByName(lower_field_name);
    271         // If the case-insensitive match worked but the field is NOT a group,
    272         if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
    273           field = NULL;
    274         }
    275       }
    276       // Again, special-case group names as described above.
    277       if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
    278           && field->message_type()->name() != field_name) {
    279         field = NULL;
    280       }
    281 
    282       if (field == NULL) {
    283         ReportError("Message type \"" + descriptor->full_name() +
    284                     "\" has no field named \"" + field_name + "\".");
    285         return false;
    286       }
    287     }
    288 
    289     // Fail if the field is not repeated and it has already been specified.
    290     if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
    291         !field->is_repeated() && reflection->HasField(*message, field)) {
    292       ReportError("Non-repeated field \"" + field_name +
    293                   "\" is specified multiple times.");
    294       return false;
    295     }
    296 
    297     // Perform special handling for embedded message types.
    298     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    299       // ':' is optional here.
    300       TryConsume(":");
    301       DO(ConsumeFieldMessage(message, reflection, field));
    302     } else {
    303       DO(Consume(":"));
    304       DO(ConsumeFieldValue(message, reflection, field));
    305     }
    306 
    307     if (field->options().deprecated()) {
    308       ReportWarning("text format contains deprecated field \""
    309                     + field_name + "\"");
    310     }
    311 
    312     return true;
    313   }
    314 
    315   bool ConsumeFieldMessage(Message* message,
    316                            const Reflection* reflection,
    317                            const FieldDescriptor* field) {
    318     string delimeter;
    319     if (TryConsume("<")) {
    320       delimeter = ">";
    321     } else {
    322       DO(Consume("{"));
    323       delimeter = "}";
    324     }
    325 
    326     if (field->is_repeated()) {
    327       DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
    328     } else {
    329       DO(ConsumeMessage(reflection->MutableMessage(message, field),
    330                         delimeter));
    331     }
    332     return true;
    333   }
    334 
    335   bool ConsumeFieldValue(Message* message,
    336                          const Reflection* reflection,
    337                          const FieldDescriptor* field) {
    338 
    339 // Define an easy to use macro for setting fields. This macro checks
    340 // to see if the field is repeated (in which case we need to use the Add
    341 // methods or not (in which case we need to use the Set methods).
    342 #define SET_FIELD(CPPTYPE, VALUE)                                  \
    343         if (field->is_repeated()) {                                \
    344           reflection->Add##CPPTYPE(message, field, VALUE);         \
    345         } else {                                                   \
    346           reflection->Set##CPPTYPE(message, field, VALUE);         \
    347         }                                                          \
    348 
    349     switch(field->cpp_type()) {
    350       case FieldDescriptor::CPPTYPE_INT32: {
    351         int64 value;
    352         DO(ConsumeSignedInteger(&value, kint32max));
    353         SET_FIELD(Int32, static_cast<int32>(value));
    354         break;
    355       }
    356 
    357       case FieldDescriptor::CPPTYPE_UINT32: {
    358         uint64 value;
    359         DO(ConsumeUnsignedInteger(&value, kuint32max));
    360         SET_FIELD(UInt32, static_cast<uint32>(value));
    361         break;
    362       }
    363 
    364       case FieldDescriptor::CPPTYPE_INT64: {
    365         int64 value;
    366         DO(ConsumeSignedInteger(&value, kint64max));
    367         SET_FIELD(Int64, value);
    368         break;
    369       }
    370 
    371       case FieldDescriptor::CPPTYPE_UINT64: {
    372         uint64 value;
    373         DO(ConsumeUnsignedInteger(&value, kuint64max));
    374         SET_FIELD(UInt64, value);
    375         break;
    376       }
    377 
    378       case FieldDescriptor::CPPTYPE_FLOAT: {
    379         double value;
    380         DO(ConsumeDouble(&value));
    381         SET_FIELD(Float, static_cast<float>(value));
    382         break;
    383       }
    384 
    385       case FieldDescriptor::CPPTYPE_DOUBLE: {
    386         double value;
    387         DO(ConsumeDouble(&value));
    388         SET_FIELD(Double, value);
    389         break;
    390       }
    391 
    392       case FieldDescriptor::CPPTYPE_STRING: {
    393         string value;
    394         DO(ConsumeString(&value));
    395         SET_FIELD(String, value);
    396         break;
    397       }
    398 
    399       case FieldDescriptor::CPPTYPE_BOOL: {
    400         string value;
    401         DO(ConsumeIdentifier(&value));
    402 
    403         if (value == "true") {
    404           SET_FIELD(Bool, true);
    405         } else if (value == "false") {
    406           SET_FIELD(Bool, false);
    407         } else {
    408           ReportError("Invalid value for boolean field \"" + field->name()
    409                       + "\". Value: \"" + value  + "\".");
    410           return false;
    411         }
    412         break;
    413       }
    414 
    415       case FieldDescriptor::CPPTYPE_ENUM: {
    416         string value;
    417         DO(ConsumeIdentifier(&value));
    418 
    419         // Find the enumeration value.
    420         const EnumDescriptor* enum_type = field->enum_type();
    421         const EnumValueDescriptor* enum_value
    422             = enum_type->FindValueByName(value);
    423 
    424         if (enum_value == NULL) {
    425           ReportError("Unknown enumeration value of \"" + value  + "\" for "
    426                       "field \"" + field->name() + "\".");
    427           return false;
    428         }
    429 
    430         SET_FIELD(Enum, enum_value);
    431         break;
    432       }
    433 
    434       case FieldDescriptor::CPPTYPE_MESSAGE: {
    435         // We should never get here. Put here instead of a default
    436         // so that if new types are added, we get a nice compiler warning.
    437         GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
    438         break;
    439       }
    440     }
    441 #undef SET_FIELD
    442     return true;
    443   }
    444 
    445   // Returns true if the current token's text is equal to that specified.
    446   bool LookingAt(const string& text) {
    447     return tokenizer_.current().text == text;
    448   }
    449 
    450   // Returns true if the current token's type is equal to that specified.
    451   bool LookingAtType(io::Tokenizer::TokenType token_type) {
    452     return tokenizer_.current().type == token_type;
    453   }
    454 
    455   // Consumes an identifier and saves its value in the identifier parameter.
    456   // Returns false if the token is not of type IDENTFIER.
    457   bool ConsumeIdentifier(string* identifier) {
    458     if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    459       ReportError("Expected identifier.");
    460       return false;
    461     }
    462 
    463     *identifier = tokenizer_.current().text;
    464 
    465     tokenizer_.Next();
    466     return true;
    467   }
    468 
    469   // Consumes a string and saves its value in the text parameter.
    470   // Returns false if the token is not of type STRING.
    471   bool ConsumeString(string* text) {
    472     if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
    473       ReportError("Expected string.");
    474       return false;
    475     }
    476 
    477     text->clear();
    478     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
    479       io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
    480 
    481       tokenizer_.Next();
    482     }
    483 
    484     return true;
    485   }
    486 
    487   // Consumes a uint64 and saves its value in the value parameter.
    488   // Returns false if the token is not of type INTEGER.
    489   bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
    490     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    491       ReportError("Expected integer.");
    492       return false;
    493     }
    494 
    495     if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
    496                                      max_value, value)) {
    497       ReportError("Integer out of range.");
    498       return false;
    499     }
    500 
    501     tokenizer_.Next();
    502     return true;
    503   }
    504 
    505   // Consumes an int64 and saves its value in the value parameter.
    506   // Note that since the tokenizer does not support negative numbers,
    507   // we actually may consume an additional token (for the minus sign) in this
    508   // method. Returns false if the token is not an integer
    509   // (signed or otherwise).
    510   bool ConsumeSignedInteger(int64* value, uint64 max_value) {
    511     bool negative = false;
    512 
    513     if (TryConsume("-")) {
    514       negative = true;
    515       // Two's complement always allows one more negative integer than
    516       // positive.
    517       ++max_value;
    518     }
    519 
    520     uint64 unsigned_value;
    521 
    522     DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
    523 
    524     *value = static_cast<int64>(unsigned_value);
    525 
    526     if (negative) {
    527       *value = -*value;
    528     }
    529 
    530     return true;
    531   }
    532 
    533   // Consumes a double and saves its value in the value parameter.
    534   // Note that since the tokenizer does not support negative numbers,
    535   // we actually may consume an additional token (for the minus sign) in this
    536   // method. Returns false if the token is not a double
    537   // (signed or otherwise).
    538   bool ConsumeDouble(double* value) {
    539     bool negative = false;
    540 
    541     if (TryConsume("-")) {
    542       negative = true;
    543     }
    544 
    545     // A double can actually be an integer, according to the tokenizer.
    546     // Therefore, we must check both cases here.
    547     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    548       // We have found an integer value for the double.
    549       uint64 integer_value;
    550       DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
    551 
    552       *value = static_cast<double>(integer_value);
    553     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
    554       // We have found a float value for the double.
    555       *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
    556 
    557       // Mark the current token as consumed.
    558       tokenizer_.Next();
    559     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    560       string text = tokenizer_.current().text;
    561       LowerString(&text);
    562       if (text == "inf" || text == "infinity") {
    563         *value = std::numeric_limits<double>::infinity();
    564         tokenizer_.Next();
    565       } else if (text == "nan") {
    566         *value = std::numeric_limits<double>::quiet_NaN();
    567         tokenizer_.Next();
    568       } else {
    569         ReportError("Expected double.");
    570         return false;
    571       }
    572     } else {
    573       ReportError("Expected double.");
    574       return false;
    575     }
    576 
    577     if (negative) {
    578       *value = -*value;
    579     }
    580 
    581     return true;
    582   }
    583 
    584   // Consumes a token and confirms that it matches that specified in the
    585   // value parameter. Returns false if the token found does not match that
    586   // which was specified.
    587   bool Consume(const string& value) {
    588     const string& current_value = tokenizer_.current().text;
    589 
    590     if (current_value != value) {
    591       ReportError("Expected \"" + value + "\", found \"" + current_value
    592                   + "\".");
    593       return false;
    594     }
    595 
    596     tokenizer_.Next();
    597 
    598     return true;
    599   }
    600 
    601   // Attempts to consume the supplied value. Returns false if a the
    602   // token found does not match the value specified.
    603   bool TryConsume(const string& value) {
    604     if (tokenizer_.current().text == value) {
    605       tokenizer_.Next();
    606       return true;
    607     } else {
    608       return false;
    609     }
    610   }
    611 
    612   // An internal instance of the Tokenizer's error collector, used to
    613   // collect any base-level parse errors and feed them to the ParserImpl.
    614   class ParserErrorCollector : public io::ErrorCollector {
    615    public:
    616     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
    617         parser_(parser) { }
    618 
    619     virtual ~ParserErrorCollector() { };
    620 
    621     virtual void AddError(int line, int column, const string& message) {
    622       parser_->ReportError(line, column, message);
    623     }
    624 
    625     virtual void AddWarning(int line, int column, const string& message) {
    626       parser_->ReportWarning(line, column, message);
    627     }
    628 
    629    private:
    630     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
    631     TextFormat::Parser::ParserImpl* parser_;
    632   };
    633 
    634   io::ErrorCollector* error_collector_;
    635   ParserErrorCollector tokenizer_error_collector_;
    636   io::Tokenizer tokenizer_;
    637   const Descriptor* root_message_type_;
    638   SingularOverwritePolicy singular_overwrite_policy_;
    639   bool had_errors_;
    640 };
    641 
    642 #undef DO
    643 
    644 // ===========================================================================
    645 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
    646 // from the Printer found in //google/protobuf/io/printer.h
    647 class TextFormat::Printer::TextGenerator {
    648  public:
    649   explicit TextGenerator(io::ZeroCopyOutputStream* output,
    650                          int initial_indent_level)
    651     : output_(output),
    652       buffer_(NULL),
    653       buffer_size_(0),
    654       at_start_of_line_(true),
    655       failed_(false),
    656       indent_(""),
    657       initial_indent_level_(initial_indent_level) {
    658     indent_.resize(initial_indent_level_ * 2, ' ');
    659   }
    660 
    661   ~TextGenerator() {
    662     // Only BackUp() if we're sure we've successfully called Next() at least
    663     // once.
    664     if (buffer_size_ > 0) {
    665       output_->BackUp(buffer_size_);
    666     }
    667   }
    668 
    669   // Indent text by two spaces.  After calling Indent(), two spaces will be
    670   // inserted at the beginning of each line of text.  Indent() may be called
    671   // multiple times to produce deeper indents.
    672   void Indent() {
    673     indent_ += "  ";
    674   }
    675 
    676   // Reduces the current indent level by two spaces, or crashes if the indent
    677   // level is zero.
    678   void Outdent() {
    679     if (indent_.empty() ||
    680         indent_.size() < initial_indent_level_ * 2) {
    681       GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
    682       return;
    683     }
    684 
    685     indent_.resize(indent_.size() - 2);
    686   }
    687 
    688   // Print text to the output stream.
    689   void Print(const string& str) {
    690     Print(str.data(), str.size());
    691   }
    692 
    693   // Print text to the output stream.
    694   void Print(const char* text) {
    695     Print(text, strlen(text));
    696   }
    697 
    698   // Print text to the output stream.
    699   void Print(const char* text, int size) {
    700     int pos = 0;  // The number of bytes we've written so far.
    701 
    702     for (int i = 0; i < size; i++) {
    703       if (text[i] == '\n') {
    704         // Saw newline.  If there is more text, we may need to insert an indent
    705         // here.  So, write what we have so far, including the '\n'.
    706         Write(text + pos, i - pos + 1);
    707         pos = i + 1;
    708 
    709         // Setting this true will cause the next Write() to insert an indent
    710         // first.
    711         at_start_of_line_ = true;
    712       }
    713     }
    714 
    715     // Write the rest.
    716     Write(text + pos, size - pos);
    717   }
    718 
    719   // True if any write to the underlying stream failed.  (We don't just
    720   // crash in this case because this is an I/O failure, not a programming
    721   // error.)
    722   bool failed() const { return failed_; }
    723 
    724  private:
    725   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
    726 
    727   void Write(const char* data, int size) {
    728     if (failed_) return;
    729     if (size == 0) return;
    730 
    731     if (at_start_of_line_) {
    732       // Insert an indent.
    733       at_start_of_line_ = false;
    734       Write(indent_.data(), indent_.size());
    735       if (failed_) return;
    736     }
    737 
    738     while (size > buffer_size_) {
    739       // Data exceeds space in the buffer.  Copy what we can and request a
    740       // new buffer.
    741       memcpy(buffer_, data, buffer_size_);
    742       data += buffer_size_;
    743       size -= buffer_size_;
    744       void* void_buffer;
    745       failed_ = !output_->Next(&void_buffer, &buffer_size_);
    746       if (failed_) return;
    747       buffer_ = reinterpret_cast<char*>(void_buffer);
    748     }
    749 
    750     // Buffer is big enough to receive the data; copy it.
    751     memcpy(buffer_, data, size);
    752     buffer_ += size;
    753     buffer_size_ -= size;
    754   }
    755 
    756   io::ZeroCopyOutputStream* const output_;
    757   char* buffer_;
    758   int buffer_size_;
    759   bool at_start_of_line_;
    760   bool failed_;
    761 
    762   string indent_;
    763   int initial_indent_level_;
    764 };
    765 
    766 // ===========================================================================
    767 
    768 TextFormat::Parser::Parser()
    769   : error_collector_(NULL),
    770     allow_partial_(false) {}
    771 
    772 TextFormat::Parser::~Parser() {}
    773 
    774 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
    775                                Message* output) {
    776   output->Clear();
    777   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
    778                     ParserImpl::FORBID_SINGULAR_OVERWRITES);
    779   return MergeUsingImpl(input, output, &parser);
    780 }
    781 
    782 bool TextFormat::Parser::ParseFromString(const string& input,
    783                                          Message* output) {
    784   io::ArrayInputStream input_stream(input.data(), input.size());
    785   return Parse(&input_stream, output);
    786 }
    787 
    788 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
    789                                Message* output) {
    790   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
    791                     ParserImpl::ALLOW_SINGULAR_OVERWRITES);
    792   return MergeUsingImpl(input, output, &parser);
    793 }
    794 
    795 bool TextFormat::Parser::MergeFromString(const string& input,
    796                                          Message* output) {
    797   io::ArrayInputStream input_stream(input.data(), input.size());
    798   return Merge(&input_stream, output);
    799 }
    800 
    801 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
    802                                         Message* output,
    803                                         ParserImpl* parser_impl) {
    804   if (!parser_impl->Parse(output)) return false;
    805   if (!allow_partial_ && !output->IsInitialized()) {
    806     vector<string> missing_fields;
    807     output->FindInitializationErrors(&missing_fields);
    808     parser_impl->ReportError(-1, 0, "Message missing required fields: " +
    809                                     JoinStrings(missing_fields, ", "));
    810     return false;
    811   }
    812   return true;
    813 }
    814 
    815 bool TextFormat::Parser::ParseFieldValueFromString(
    816     const string& input,
    817     const FieldDescriptor* field,
    818     Message* output) {
    819   io::ArrayInputStream input_stream(input.data(), input.size());
    820   ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
    821                     ParserImpl::ALLOW_SINGULAR_OVERWRITES);
    822   return parser.ParseField(field, output);
    823 }
    824 
    825 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
    826                                     Message* output) {
    827   return Parser().Parse(input, output);
    828 }
    829 
    830 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
    831                                     Message* output) {
    832   return Parser().Merge(input, output);
    833 }
    834 
    835 /* static */ bool TextFormat::ParseFromString(const string& input,
    836                                               Message* output) {
    837   return Parser().ParseFromString(input, output);
    838 }
    839 
    840 /* static */ bool TextFormat::MergeFromString(const string& input,
    841                                               Message* output) {
    842   return Parser().MergeFromString(input, output);
    843 }
    844 
    845 // ===========================================================================
    846 
    847 TextFormat::Printer::Printer()
    848   : initial_indent_level_(0),
    849     single_line_mode_(false),
    850     use_short_repeated_primitives_(false),
    851     utf8_string_escaping_(false) {}
    852 
    853 TextFormat::Printer::~Printer() {}
    854 
    855 bool TextFormat::Printer::PrintToString(const Message& message,
    856                                         string* output) {
    857   GOOGLE_DCHECK(output) << "output specified is NULL";
    858 
    859   output->clear();
    860   io::StringOutputStream output_stream(output);
    861 
    862   bool result = Print(message, &output_stream);
    863 
    864   return result;
    865 }
    866 
    867 bool TextFormat::Printer::PrintUnknownFieldsToString(
    868     const UnknownFieldSet& unknown_fields,
    869     string* output) {
    870   GOOGLE_DCHECK(output) << "output specified is NULL";
    871 
    872   output->clear();
    873   io::StringOutputStream output_stream(output);
    874   return PrintUnknownFields(unknown_fields, &output_stream);
    875 }
    876 
    877 bool TextFormat::Printer::Print(const Message& message,
    878                                 io::ZeroCopyOutputStream* output) {
    879   TextGenerator generator(output, initial_indent_level_);
    880 
    881   Print(message, generator);
    882 
    883   // Output false if the generator failed internally.
    884   return !generator.failed();
    885 }
    886 
    887 bool TextFormat::Printer::PrintUnknownFields(
    888     const UnknownFieldSet& unknown_fields,
    889     io::ZeroCopyOutputStream* output) {
    890   TextGenerator generator(output, initial_indent_level_);
    891 
    892   PrintUnknownFields(unknown_fields, generator);
    893 
    894   // Output false if the generator failed internally.
    895   return !generator.failed();
    896 }
    897 
    898 void TextFormat::Printer::Print(const Message& message,
    899                                 TextGenerator& generator) {
    900   const Reflection* reflection = message.GetReflection();
    901   vector<const FieldDescriptor*> fields;
    902   reflection->ListFields(message, &fields);
    903   for (int i = 0; i < fields.size(); i++) {
    904     PrintField(message, reflection, fields[i], generator);
    905   }
    906   PrintUnknownFields(reflection->GetUnknownFields(message), generator);
    907 }
    908 
    909 void TextFormat::Printer::PrintFieldValueToString(
    910     const Message& message,
    911     const FieldDescriptor* field,
    912     int index,
    913     string* output) {
    914 
    915   GOOGLE_DCHECK(output) << "output specified is NULL";
    916 
    917   output->clear();
    918   io::StringOutputStream output_stream(output);
    919   TextGenerator generator(&output_stream, initial_indent_level_);
    920 
    921   PrintFieldValue(message, message.GetReflection(), field, index, generator);
    922 }
    923 
    924 void TextFormat::Printer::PrintField(const Message& message,
    925                                      const Reflection* reflection,
    926                                      const FieldDescriptor* field,
    927                                      TextGenerator& generator) {
    928   if (use_short_repeated_primitives_ &&
    929       field->is_repeated() &&
    930       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
    931       field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
    932     PrintShortRepeatedField(message, reflection, field, generator);
    933     return;
    934   }
    935 
    936   int count = 0;
    937 
    938   if (field->is_repeated()) {
    939     count = reflection->FieldSize(message, field);
    940   } else if (reflection->HasField(message, field)) {
    941     count = 1;
    942   }
    943 
    944   for (int j = 0; j < count; ++j) {
    945     PrintFieldName(message, reflection, field, generator);
    946 
    947     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    948       if (single_line_mode_) {
    949         generator.Print(" { ");
    950       } else {
    951         generator.Print(" {\n");
    952         generator.Indent();
    953       }
    954     } else {
    955       generator.Print(": ");
    956     }
    957 
    958     // Write the field value.
    959     int field_index = j;
    960     if (!field->is_repeated()) {
    961       field_index = -1;
    962     }
    963 
    964     PrintFieldValue(message, reflection, field, field_index, generator);
    965 
    966     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    967       if (single_line_mode_) {
    968         generator.Print("} ");
    969       } else {
    970         generator.Outdent();
    971         generator.Print("}\n");
    972       }
    973     } else {
    974       if (single_line_mode_) {
    975         generator.Print(" ");
    976       } else {
    977         generator.Print("\n");
    978       }
    979     }
    980   }
    981 }
    982 
    983 void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
    984                                                   const Reflection* reflection,
    985                                                   const FieldDescriptor* field,
    986                                                   TextGenerator& generator) {
    987   // Print primitive repeated field in short form.
    988   PrintFieldName(message, reflection, field, generator);
    989 
    990   int size = reflection->FieldSize(message, field);
    991   generator.Print(": [");
    992   for (int i = 0; i < size; i++) {
    993     if (i > 0) generator.Print(", ");
    994     PrintFieldValue(message, reflection, field, i, generator);
    995   }
    996   if (single_line_mode_) {
    997     generator.Print("] ");
    998   } else {
    999     generator.Print("]\n");
   1000   }
   1001 }
   1002 
   1003 void TextFormat::Printer::PrintFieldName(const Message& message,
   1004                                          const Reflection* reflection,
   1005                                          const FieldDescriptor* field,
   1006                                          TextGenerator& generator) {
   1007   if (field->is_extension()) {
   1008     generator.Print("[");
   1009     // We special-case MessageSet elements for compatibility with proto1.
   1010     if (field->containing_type()->options().message_set_wire_format()
   1011         && field->type() == FieldDescriptor::TYPE_MESSAGE
   1012         && field->is_optional()
   1013         && field->extension_scope() == field->message_type()) {
   1014       generator.Print(field->message_type()->full_name());
   1015     } else {
   1016       generator.Print(field->full_name());
   1017     }
   1018     generator.Print("]");
   1019   } else {
   1020     if (field->type() == FieldDescriptor::TYPE_GROUP) {
   1021       // Groups must be serialized with their original capitalization.
   1022       generator.Print(field->message_type()->name());
   1023     } else {
   1024       generator.Print(field->name());
   1025     }
   1026   }
   1027 }
   1028 
   1029 void TextFormat::Printer::PrintFieldValue(
   1030     const Message& message,
   1031     const Reflection* reflection,
   1032     const FieldDescriptor* field,
   1033     int index,
   1034     TextGenerator& generator) {
   1035   GOOGLE_DCHECK(field->is_repeated() || (index == -1))
   1036       << "Index must be -1 for non-repeated fields";
   1037 
   1038   switch (field->cpp_type()) {
   1039 #define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING)                             \
   1040       case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
   1041         generator.Print(TO_STRING(field->is_repeated() ?                     \
   1042           reflection->GetRepeated##METHOD(message, field, index) :           \
   1043           reflection->Get##METHOD(message, field)));                         \
   1044         break;                                                               \
   1045 
   1046       OUTPUT_FIELD( INT32,  Int32, SimpleItoa);
   1047       OUTPUT_FIELD( INT64,  Int64, SimpleItoa);
   1048       OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
   1049       OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
   1050       OUTPUT_FIELD( FLOAT,  Float, SimpleFtoa);
   1051       OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
   1052 #undef OUTPUT_FIELD
   1053 
   1054       case FieldDescriptor::CPPTYPE_STRING: {
   1055         string scratch;
   1056         const string& value = field->is_repeated() ?
   1057             reflection->GetRepeatedStringReference(
   1058               message, field, index, &scratch) :
   1059             reflection->GetStringReference(message, field, &scratch);
   1060 
   1061         generator.Print("\"");
   1062         if (utf8_string_escaping_) {
   1063           generator.Print(strings::Utf8SafeCEscape(value));
   1064         } else {
   1065           generator.Print(CEscape(value));
   1066         }
   1067         generator.Print("\"");
   1068 
   1069         break;
   1070       }
   1071 
   1072       case FieldDescriptor::CPPTYPE_BOOL:
   1073         if (field->is_repeated()) {
   1074           generator.Print(reflection->GetRepeatedBool(message, field, index)
   1075                           ? "true" : "false");
   1076         } else {
   1077           generator.Print(reflection->GetBool(message, field)
   1078                           ? "true" : "false");
   1079         }
   1080         break;
   1081 
   1082       case FieldDescriptor::CPPTYPE_ENUM:
   1083         generator.Print(field->is_repeated() ?
   1084           reflection->GetRepeatedEnum(message, field, index)->name() :
   1085           reflection->GetEnum(message, field)->name());
   1086         break;
   1087 
   1088       case FieldDescriptor::CPPTYPE_MESSAGE:
   1089         Print(field->is_repeated() ?
   1090                 reflection->GetRepeatedMessage(message, field, index) :
   1091                 reflection->GetMessage(message, field),
   1092               generator);
   1093         break;
   1094   }
   1095 }
   1096 
   1097 /* static */ bool TextFormat::Print(const Message& message,
   1098                                     io::ZeroCopyOutputStream* output) {
   1099   return Printer().Print(message, output);
   1100 }
   1101 
   1102 /* static */ bool TextFormat::PrintUnknownFields(
   1103     const UnknownFieldSet& unknown_fields,
   1104     io::ZeroCopyOutputStream* output) {
   1105   return Printer().PrintUnknownFields(unknown_fields, output);
   1106 }
   1107 
   1108 /* static */ bool TextFormat::PrintToString(
   1109     const Message& message, string* output) {
   1110   return Printer().PrintToString(message, output);
   1111 }
   1112 
   1113 /* static */ bool TextFormat::PrintUnknownFieldsToString(
   1114     const UnknownFieldSet& unknown_fields, string* output) {
   1115   return Printer().PrintUnknownFieldsToString(unknown_fields, output);
   1116 }
   1117 
   1118 /* static */ void TextFormat::PrintFieldValueToString(
   1119     const Message& message,
   1120     const FieldDescriptor* field,
   1121     int index,
   1122     string* output) {
   1123   return Printer().PrintFieldValueToString(message, field, index, output);
   1124 }
   1125 
   1126 /* static */ bool TextFormat::ParseFieldValueFromString(
   1127     const string& input,
   1128     const FieldDescriptor* field,
   1129     Message* message) {
   1130   return Parser().ParseFieldValueFromString(input, field, message);
   1131 }
   1132 
   1133 // Prints an integer as hex with a fixed number of digits dependent on the
   1134 // integer type.
   1135 template<typename IntType>
   1136 static string PaddedHex(IntType value) {
   1137   string result;
   1138   result.reserve(sizeof(value) * 2);
   1139   for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
   1140     result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
   1141   }
   1142   return result;
   1143 }
   1144 
   1145 void TextFormat::Printer::PrintUnknownFields(
   1146     const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
   1147   for (int i = 0; i < unknown_fields.field_count(); i++) {
   1148     const UnknownField& field = unknown_fields.field(i);
   1149     string field_number = SimpleItoa(field.number());
   1150 
   1151     switch (field.type()) {
   1152       case UnknownField::TYPE_VARINT:
   1153         generator.Print(field_number);
   1154         generator.Print(": ");
   1155         generator.Print(SimpleItoa(field.varint()));
   1156         if (single_line_mode_) {
   1157           generator.Print(" ");
   1158         } else {
   1159           generator.Print("\n");
   1160         }
   1161         break;
   1162       case UnknownField::TYPE_FIXED32: {
   1163         generator.Print(field_number);
   1164         generator.Print(": 0x");
   1165         char buffer[kFastToBufferSize];
   1166         generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
   1167         if (single_line_mode_) {
   1168           generator.Print(" ");
   1169         } else {
   1170           generator.Print("\n");
   1171         }
   1172         break;
   1173       }
   1174       case UnknownField::TYPE_FIXED64: {
   1175         generator.Print(field_number);
   1176         generator.Print(": 0x");
   1177         char buffer[kFastToBufferSize];
   1178         generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
   1179         if (single_line_mode_) {
   1180           generator.Print(" ");
   1181         } else {
   1182           generator.Print("\n");
   1183         }
   1184         break;
   1185       }
   1186       case UnknownField::TYPE_LENGTH_DELIMITED: {
   1187         generator.Print(field_number);
   1188         const string& value = field.length_delimited();
   1189         UnknownFieldSet embedded_unknown_fields;
   1190         if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
   1191           // This field is parseable as a Message.
   1192           // So it is probably an embedded message.
   1193           if (single_line_mode_) {
   1194             generator.Print(" { ");
   1195           } else {
   1196             generator.Print(" {\n");
   1197             generator.Indent();
   1198           }
   1199           PrintUnknownFields(embedded_unknown_fields, generator);
   1200           if (single_line_mode_) {
   1201             generator.Print("} ");
   1202           } else {
   1203             generator.Outdent();
   1204             generator.Print("}\n");
   1205           }
   1206         } else {
   1207           // This field is not parseable as a Message.
   1208           // So it is probably just a plain string.
   1209           generator.Print(": \"");
   1210           generator.Print(CEscape(value));
   1211           generator.Print("\"");
   1212           if (single_line_mode_) {
   1213             generator.Print(" ");
   1214           } else {
   1215             generator.Print("\n");
   1216           }
   1217         }
   1218         break;
   1219       }
   1220       case UnknownField::TYPE_GROUP:
   1221         generator.Print(field_number);
   1222         if (single_line_mode_) {
   1223           generator.Print(" { ");
   1224         } else {
   1225           generator.Print(" {\n");
   1226           generator.Indent();
   1227         }
   1228         PrintUnknownFields(field.group(), generator);
   1229         if (single_line_mode_) {
   1230           generator.Print("} ");
   1231         } else {
   1232           generator.Outdent();
   1233           generator.Print("}\n");
   1234         }
   1235         break;
   1236     }
   1237   }
   1238 }
   1239 
   1240 }  // namespace protobuf
   1241 }  // namespace google
   1242