1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr (at) google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 35 #include <float.h> 36 #include <math.h> 37 #include <stdio.h> 38 #include <stack> 39 #include <limits> 40 41 #include <google/protobuf/text_format.h> 42 43 #include <google/protobuf/descriptor.h> 44 #include <google/protobuf/io/coded_stream.h> 45 #include <google/protobuf/io/zero_copy_stream.h> 46 #include <google/protobuf/io/zero_copy_stream_impl.h> 47 #include <google/protobuf/unknown_field_set.h> 48 #include <google/protobuf/descriptor.pb.h> 49 #include <google/protobuf/io/tokenizer.h> 50 #include <google/protobuf/stubs/strutil.h> 51 52 namespace google { 53 namespace protobuf { 54 55 string Message::DebugString() const { 56 string debug_string; 57 58 TextFormat::PrintToString(*this, &debug_string); 59 60 return debug_string; 61 } 62 63 string Message::ShortDebugString() const { 64 string debug_string; 65 66 TextFormat::Printer printer; 67 printer.SetSingleLineMode(true); 68 69 printer.PrintToString(*this, &debug_string); 70 // Single line mode currently might have an extra space at the end. 71 if (debug_string.size() > 0 && 72 debug_string[debug_string.size() - 1] == ' ') { 73 debug_string.resize(debug_string.size() - 1); 74 } 75 76 return debug_string; 77 } 78 79 string Message::Utf8DebugString() const { 80 string debug_string; 81 82 TextFormat::Printer printer; 83 printer.SetUseUtf8StringEscaping(true); 84 85 printer.PrintToString(*this, &debug_string); 86 87 return debug_string; 88 } 89 90 void Message::PrintDebugString() const { 91 printf("%s", DebugString().c_str()); 92 } 93 94 95 // =========================================================================== 96 // Internal class for parsing an ASCII representation of a Protocol Message. 97 // This class makes use of the Protocol Message compiler's tokenizer found 98 // in //google/protobuf/io/tokenizer.h. Note that class's Parse 99 // method is *not* thread-safe and should only be used in a single thread at 100 // a time. 101 102 // Makes code slightly more readable. The meaning of "DO(foo)" is 103 // "Execute foo and fail if it fails.", where failure is indicated by 104 // returning false. Borrowed from parser.cc (Thanks Kenton!). 105 #define DO(STATEMENT) if (STATEMENT) {} else return false 106 107 class TextFormat::Parser::ParserImpl { 108 public: 109 110 // Determines if repeated values for a non-repeated field are 111 // permitted, e.g., the string "foo: 1 foo: 2" for a 112 // required/optional field named "foo". 113 enum SingularOverwritePolicy { 114 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained 115 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued 116 }; 117 118 ParserImpl(const Descriptor* root_message_type, 119 io::ZeroCopyInputStream* input_stream, 120 io::ErrorCollector* error_collector, 121 SingularOverwritePolicy singular_overwrite_policy) 122 : error_collector_(error_collector), 123 tokenizer_error_collector_(this), 124 tokenizer_(input_stream, &tokenizer_error_collector_), 125 root_message_type_(root_message_type), 126 singular_overwrite_policy_(singular_overwrite_policy), 127 had_errors_(false) { 128 // For backwards-compatibility with proto1, we need to allow the 'f' suffix 129 // for floats. 130 tokenizer_.set_allow_f_after_float(true); 131 132 // '#' starts a comment. 133 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE); 134 135 // Consume the starting token. 136 tokenizer_.Next(); 137 } 138 ~ParserImpl() { } 139 140 // Parses the ASCII representation specified in input and saves the 141 // information into the output pointer (a Message). Returns 142 // false if an error occurs (an error will also be logged to 143 // GOOGLE_LOG(ERROR)). 144 bool Parse(Message* output) { 145 // Consume fields until we cannot do so anymore. 146 while(true) { 147 if (LookingAtType(io::Tokenizer::TYPE_END)) { 148 return !had_errors_; 149 } 150 151 DO(ConsumeField(output)); 152 } 153 } 154 155 bool ParseField(const FieldDescriptor* field, Message* output) { 156 bool suc; 157 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 158 suc = ConsumeFieldMessage(output, output->GetReflection(), field); 159 } else { 160 suc = ConsumeFieldValue(output, output->GetReflection(), field); 161 } 162 return suc && LookingAtType(io::Tokenizer::TYPE_END); 163 } 164 165 void ReportError(int line, int col, const string& message) { 166 had_errors_ = true; 167 if (error_collector_ == NULL) { 168 if (line >= 0) { 169 GOOGLE_LOG(ERROR) << "Error parsing text-format " 170 << root_message_type_->full_name() 171 << ": " << (line + 1) << ":" 172 << (col + 1) << ": " << message; 173 } else { 174 GOOGLE_LOG(ERROR) << "Error parsing text-format " 175 << root_message_type_->full_name() 176 << ": " << message; 177 } 178 } else { 179 error_collector_->AddError(line, col, message); 180 } 181 } 182 183 void ReportWarning(int line, int col, const string& message) { 184 if (error_collector_ == NULL) { 185 if (line >= 0) { 186 GOOGLE_LOG(WARNING) << "Warning parsing text-format " 187 << root_message_type_->full_name() 188 << ": " << (line + 1) << ":" 189 << (col + 1) << ": " << message; 190 } else { 191 GOOGLE_LOG(WARNING) << "Warning parsing text-format " 192 << root_message_type_->full_name() 193 << ": " << message; 194 } 195 } else { 196 error_collector_->AddWarning(line, col, message); 197 } 198 } 199 200 private: 201 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl); 202 203 // Reports an error with the given message with information indicating 204 // the position (as derived from the current token). 205 void ReportError(const string& message) { 206 ReportError(tokenizer_.current().line, tokenizer_.current().column, 207 message); 208 } 209 210 // Reports a warning with the given message with information indicating 211 // the position (as derived from the current token). 212 void ReportWarning(const string& message) { 213 ReportWarning(tokenizer_.current().line, tokenizer_.current().column, 214 message); 215 } 216 217 // Consumes the specified message with the given starting delimeter. 218 // This method checks to see that the end delimeter at the conclusion of 219 // the consumption matches the starting delimeter passed in here. 220 bool ConsumeMessage(Message* message, const string delimeter) { 221 while (!LookingAt(">") && !LookingAt("}")) { 222 DO(ConsumeField(message)); 223 } 224 225 // Confirm that we have a valid ending delimeter. 226 DO(Consume(delimeter)); 227 228 return true; 229 } 230 231 // Consumes the current field (as returned by the tokenizer) on the 232 // passed in message. 233 bool ConsumeField(Message* message) { 234 const Reflection* reflection = message->GetReflection(); 235 const Descriptor* descriptor = message->GetDescriptor(); 236 237 string field_name; 238 239 const FieldDescriptor* field = NULL; 240 241 if (TryConsume("[")) { 242 // Extension. 243 DO(ConsumeIdentifier(&field_name)); 244 while (TryConsume(".")) { 245 string part; 246 DO(ConsumeIdentifier(&part)); 247 field_name += "."; 248 field_name += part; 249 } 250 DO(Consume("]")); 251 252 field = reflection->FindKnownExtensionByName(field_name); 253 254 if (field == NULL) { 255 ReportError("Extension \"" + field_name + "\" is not defined or " 256 "is not an extension of \"" + 257 descriptor->full_name() + "\"."); 258 return false; 259 } 260 } else { 261 DO(ConsumeIdentifier(&field_name)); 262 263 field = descriptor->FindFieldByName(field_name); 264 // Group names are expected to be capitalized as they appear in the 265 // .proto file, which actually matches their type names, not their field 266 // names. 267 if (field == NULL) { 268 string lower_field_name = field_name; 269 LowerString(&lower_field_name); 270 field = descriptor->FindFieldByName(lower_field_name); 271 // If the case-insensitive match worked but the field is NOT a group, 272 if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) { 273 field = NULL; 274 } 275 } 276 // Again, special-case group names as described above. 277 if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP 278 && field->message_type()->name() != field_name) { 279 field = NULL; 280 } 281 282 if (field == NULL) { 283 ReportError("Message type \"" + descriptor->full_name() + 284 "\" has no field named \"" + field_name + "\"."); 285 return false; 286 } 287 } 288 289 // Fail if the field is not repeated and it has already been specified. 290 if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) && 291 !field->is_repeated() && reflection->HasField(*message, field)) { 292 ReportError("Non-repeated field \"" + field_name + 293 "\" is specified multiple times."); 294 return false; 295 } 296 297 // Perform special handling for embedded message types. 298 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 299 // ':' is optional here. 300 TryConsume(":"); 301 DO(ConsumeFieldMessage(message, reflection, field)); 302 } else { 303 DO(Consume(":")); 304 DO(ConsumeFieldValue(message, reflection, field)); 305 } 306 307 if (field->options().deprecated()) { 308 ReportWarning("text format contains deprecated field \"" 309 + field_name + "\""); 310 } 311 312 return true; 313 } 314 315 bool ConsumeFieldMessage(Message* message, 316 const Reflection* reflection, 317 const FieldDescriptor* field) { 318 string delimeter; 319 if (TryConsume("<")) { 320 delimeter = ">"; 321 } else { 322 DO(Consume("{")); 323 delimeter = "}"; 324 } 325 326 if (field->is_repeated()) { 327 DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter)); 328 } else { 329 DO(ConsumeMessage(reflection->MutableMessage(message, field), 330 delimeter)); 331 } 332 return true; 333 } 334 335 bool ConsumeFieldValue(Message* message, 336 const Reflection* reflection, 337 const FieldDescriptor* field) { 338 339 // Define an easy to use macro for setting fields. This macro checks 340 // to see if the field is repeated (in which case we need to use the Add 341 // methods or not (in which case we need to use the Set methods). 342 #define SET_FIELD(CPPTYPE, VALUE) \ 343 if (field->is_repeated()) { \ 344 reflection->Add##CPPTYPE(message, field, VALUE); \ 345 } else { \ 346 reflection->Set##CPPTYPE(message, field, VALUE); \ 347 } \ 348 349 switch(field->cpp_type()) { 350 case FieldDescriptor::CPPTYPE_INT32: { 351 int64 value; 352 DO(ConsumeSignedInteger(&value, kint32max)); 353 SET_FIELD(Int32, static_cast<int32>(value)); 354 break; 355 } 356 357 case FieldDescriptor::CPPTYPE_UINT32: { 358 uint64 value; 359 DO(ConsumeUnsignedInteger(&value, kuint32max)); 360 SET_FIELD(UInt32, static_cast<uint32>(value)); 361 break; 362 } 363 364 case FieldDescriptor::CPPTYPE_INT64: { 365 int64 value; 366 DO(ConsumeSignedInteger(&value, kint64max)); 367 SET_FIELD(Int64, value); 368 break; 369 } 370 371 case FieldDescriptor::CPPTYPE_UINT64: { 372 uint64 value; 373 DO(ConsumeUnsignedInteger(&value, kuint64max)); 374 SET_FIELD(UInt64, value); 375 break; 376 } 377 378 case FieldDescriptor::CPPTYPE_FLOAT: { 379 double value; 380 DO(ConsumeDouble(&value)); 381 SET_FIELD(Float, static_cast<float>(value)); 382 break; 383 } 384 385 case FieldDescriptor::CPPTYPE_DOUBLE: { 386 double value; 387 DO(ConsumeDouble(&value)); 388 SET_FIELD(Double, value); 389 break; 390 } 391 392 case FieldDescriptor::CPPTYPE_STRING: { 393 string value; 394 DO(ConsumeString(&value)); 395 SET_FIELD(String, value); 396 break; 397 } 398 399 case FieldDescriptor::CPPTYPE_BOOL: { 400 string value; 401 DO(ConsumeIdentifier(&value)); 402 403 if (value == "true") { 404 SET_FIELD(Bool, true); 405 } else if (value == "false") { 406 SET_FIELD(Bool, false); 407 } else { 408 ReportError("Invalid value for boolean field \"" + field->name() 409 + "\". Value: \"" + value + "\"."); 410 return false; 411 } 412 break; 413 } 414 415 case FieldDescriptor::CPPTYPE_ENUM: { 416 string value; 417 DO(ConsumeIdentifier(&value)); 418 419 // Find the enumeration value. 420 const EnumDescriptor* enum_type = field->enum_type(); 421 const EnumValueDescriptor* enum_value 422 = enum_type->FindValueByName(value); 423 424 if (enum_value == NULL) { 425 ReportError("Unknown enumeration value of \"" + value + "\" for " 426 "field \"" + field->name() + "\"."); 427 return false; 428 } 429 430 SET_FIELD(Enum, enum_value); 431 break; 432 } 433 434 case FieldDescriptor::CPPTYPE_MESSAGE: { 435 // We should never get here. Put here instead of a default 436 // so that if new types are added, we get a nice compiler warning. 437 GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE"; 438 break; 439 } 440 } 441 #undef SET_FIELD 442 return true; 443 } 444 445 // Returns true if the current token's text is equal to that specified. 446 bool LookingAt(const string& text) { 447 return tokenizer_.current().text == text; 448 } 449 450 // Returns true if the current token's type is equal to that specified. 451 bool LookingAtType(io::Tokenizer::TokenType token_type) { 452 return tokenizer_.current().type == token_type; 453 } 454 455 // Consumes an identifier and saves its value in the identifier parameter. 456 // Returns false if the token is not of type IDENTFIER. 457 bool ConsumeIdentifier(string* identifier) { 458 if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 459 ReportError("Expected identifier."); 460 return false; 461 } 462 463 *identifier = tokenizer_.current().text; 464 465 tokenizer_.Next(); 466 return true; 467 } 468 469 // Consumes a string and saves its value in the text parameter. 470 // Returns false if the token is not of type STRING. 471 bool ConsumeString(string* text) { 472 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) { 473 ReportError("Expected string."); 474 return false; 475 } 476 477 text->clear(); 478 while (LookingAtType(io::Tokenizer::TYPE_STRING)) { 479 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text); 480 481 tokenizer_.Next(); 482 } 483 484 return true; 485 } 486 487 // Consumes a uint64 and saves its value in the value parameter. 488 // Returns false if the token is not of type INTEGER. 489 bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) { 490 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 491 ReportError("Expected integer."); 492 return false; 493 } 494 495 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text, 496 max_value, value)) { 497 ReportError("Integer out of range."); 498 return false; 499 } 500 501 tokenizer_.Next(); 502 return true; 503 } 504 505 // Consumes an int64 and saves its value in the value parameter. 506 // Note that since the tokenizer does not support negative numbers, 507 // we actually may consume an additional token (for the minus sign) in this 508 // method. Returns false if the token is not an integer 509 // (signed or otherwise). 510 bool ConsumeSignedInteger(int64* value, uint64 max_value) { 511 bool negative = false; 512 513 if (TryConsume("-")) { 514 negative = true; 515 // Two's complement always allows one more negative integer than 516 // positive. 517 ++max_value; 518 } 519 520 uint64 unsigned_value; 521 522 DO(ConsumeUnsignedInteger(&unsigned_value, max_value)); 523 524 *value = static_cast<int64>(unsigned_value); 525 526 if (negative) { 527 *value = -*value; 528 } 529 530 return true; 531 } 532 533 // Consumes a double and saves its value in the value parameter. 534 // Note that since the tokenizer does not support negative numbers, 535 // we actually may consume an additional token (for the minus sign) in this 536 // method. Returns false if the token is not a double 537 // (signed or otherwise). 538 bool ConsumeDouble(double* value) { 539 bool negative = false; 540 541 if (TryConsume("-")) { 542 negative = true; 543 } 544 545 // A double can actually be an integer, according to the tokenizer. 546 // Therefore, we must check both cases here. 547 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 548 // We have found an integer value for the double. 549 uint64 integer_value; 550 DO(ConsumeUnsignedInteger(&integer_value, kuint64max)); 551 552 *value = static_cast<double>(integer_value); 553 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) { 554 // We have found a float value for the double. 555 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text); 556 557 // Mark the current token as consumed. 558 tokenizer_.Next(); 559 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 560 string text = tokenizer_.current().text; 561 LowerString(&text); 562 if (text == "inf" || text == "infinity") { 563 *value = std::numeric_limits<double>::infinity(); 564 tokenizer_.Next(); 565 } else if (text == "nan") { 566 *value = std::numeric_limits<double>::quiet_NaN(); 567 tokenizer_.Next(); 568 } else { 569 ReportError("Expected double."); 570 return false; 571 } 572 } else { 573 ReportError("Expected double."); 574 return false; 575 } 576 577 if (negative) { 578 *value = -*value; 579 } 580 581 return true; 582 } 583 584 // Consumes a token and confirms that it matches that specified in the 585 // value parameter. Returns false if the token found does not match that 586 // which was specified. 587 bool Consume(const string& value) { 588 const string& current_value = tokenizer_.current().text; 589 590 if (current_value != value) { 591 ReportError("Expected \"" + value + "\", found \"" + current_value 592 + "\"."); 593 return false; 594 } 595 596 tokenizer_.Next(); 597 598 return true; 599 } 600 601 // Attempts to consume the supplied value. Returns false if a the 602 // token found does not match the value specified. 603 bool TryConsume(const string& value) { 604 if (tokenizer_.current().text == value) { 605 tokenizer_.Next(); 606 return true; 607 } else { 608 return false; 609 } 610 } 611 612 // An internal instance of the Tokenizer's error collector, used to 613 // collect any base-level parse errors and feed them to the ParserImpl. 614 class ParserErrorCollector : public io::ErrorCollector { 615 public: 616 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) : 617 parser_(parser) { } 618 619 virtual ~ParserErrorCollector() { }; 620 621 virtual void AddError(int line, int column, const string& message) { 622 parser_->ReportError(line, column, message); 623 } 624 625 virtual void AddWarning(int line, int column, const string& message) { 626 parser_->ReportWarning(line, column, message); 627 } 628 629 private: 630 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector); 631 TextFormat::Parser::ParserImpl* parser_; 632 }; 633 634 io::ErrorCollector* error_collector_; 635 ParserErrorCollector tokenizer_error_collector_; 636 io::Tokenizer tokenizer_; 637 const Descriptor* root_message_type_; 638 SingularOverwritePolicy singular_overwrite_policy_; 639 bool had_errors_; 640 }; 641 642 #undef DO 643 644 // =========================================================================== 645 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted 646 // from the Printer found in //google/protobuf/io/printer.h 647 class TextFormat::Printer::TextGenerator { 648 public: 649 explicit TextGenerator(io::ZeroCopyOutputStream* output, 650 int initial_indent_level) 651 : output_(output), 652 buffer_(NULL), 653 buffer_size_(0), 654 at_start_of_line_(true), 655 failed_(false), 656 indent_(""), 657 initial_indent_level_(initial_indent_level) { 658 indent_.resize(initial_indent_level_ * 2, ' '); 659 } 660 661 ~TextGenerator() { 662 // Only BackUp() if we're sure we've successfully called Next() at least 663 // once. 664 if (buffer_size_ > 0) { 665 output_->BackUp(buffer_size_); 666 } 667 } 668 669 // Indent text by two spaces. After calling Indent(), two spaces will be 670 // inserted at the beginning of each line of text. Indent() may be called 671 // multiple times to produce deeper indents. 672 void Indent() { 673 indent_ += " "; 674 } 675 676 // Reduces the current indent level by two spaces, or crashes if the indent 677 // level is zero. 678 void Outdent() { 679 if (indent_.empty() || 680 indent_.size() < initial_indent_level_ * 2) { 681 GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent()."; 682 return; 683 } 684 685 indent_.resize(indent_.size() - 2); 686 } 687 688 // Print text to the output stream. 689 void Print(const string& str) { 690 Print(str.data(), str.size()); 691 } 692 693 // Print text to the output stream. 694 void Print(const char* text) { 695 Print(text, strlen(text)); 696 } 697 698 // Print text to the output stream. 699 void Print(const char* text, int size) { 700 int pos = 0; // The number of bytes we've written so far. 701 702 for (int i = 0; i < size; i++) { 703 if (text[i] == '\n') { 704 // Saw newline. If there is more text, we may need to insert an indent 705 // here. So, write what we have so far, including the '\n'. 706 Write(text + pos, i - pos + 1); 707 pos = i + 1; 708 709 // Setting this true will cause the next Write() to insert an indent 710 // first. 711 at_start_of_line_ = true; 712 } 713 } 714 715 // Write the rest. 716 Write(text + pos, size - pos); 717 } 718 719 // True if any write to the underlying stream failed. (We don't just 720 // crash in this case because this is an I/O failure, not a programming 721 // error.) 722 bool failed() const { return failed_; } 723 724 private: 725 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator); 726 727 void Write(const char* data, int size) { 728 if (failed_) return; 729 if (size == 0) return; 730 731 if (at_start_of_line_) { 732 // Insert an indent. 733 at_start_of_line_ = false; 734 Write(indent_.data(), indent_.size()); 735 if (failed_) return; 736 } 737 738 while (size > buffer_size_) { 739 // Data exceeds space in the buffer. Copy what we can and request a 740 // new buffer. 741 memcpy(buffer_, data, buffer_size_); 742 data += buffer_size_; 743 size -= buffer_size_; 744 void* void_buffer; 745 failed_ = !output_->Next(&void_buffer, &buffer_size_); 746 if (failed_) return; 747 buffer_ = reinterpret_cast<char*>(void_buffer); 748 } 749 750 // Buffer is big enough to receive the data; copy it. 751 memcpy(buffer_, data, size); 752 buffer_ += size; 753 buffer_size_ -= size; 754 } 755 756 io::ZeroCopyOutputStream* const output_; 757 char* buffer_; 758 int buffer_size_; 759 bool at_start_of_line_; 760 bool failed_; 761 762 string indent_; 763 int initial_indent_level_; 764 }; 765 766 // =========================================================================== 767 768 TextFormat::Parser::Parser() 769 : error_collector_(NULL), 770 allow_partial_(false) {} 771 772 TextFormat::Parser::~Parser() {} 773 774 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input, 775 Message* output) { 776 output->Clear(); 777 ParserImpl parser(output->GetDescriptor(), input, error_collector_, 778 ParserImpl::FORBID_SINGULAR_OVERWRITES); 779 return MergeUsingImpl(input, output, &parser); 780 } 781 782 bool TextFormat::Parser::ParseFromString(const string& input, 783 Message* output) { 784 io::ArrayInputStream input_stream(input.data(), input.size()); 785 return Parse(&input_stream, output); 786 } 787 788 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input, 789 Message* output) { 790 ParserImpl parser(output->GetDescriptor(), input, error_collector_, 791 ParserImpl::ALLOW_SINGULAR_OVERWRITES); 792 return MergeUsingImpl(input, output, &parser); 793 } 794 795 bool TextFormat::Parser::MergeFromString(const string& input, 796 Message* output) { 797 io::ArrayInputStream input_stream(input.data(), input.size()); 798 return Merge(&input_stream, output); 799 } 800 801 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input, 802 Message* output, 803 ParserImpl* parser_impl) { 804 if (!parser_impl->Parse(output)) return false; 805 if (!allow_partial_ && !output->IsInitialized()) { 806 vector<string> missing_fields; 807 output->FindInitializationErrors(&missing_fields); 808 parser_impl->ReportError(-1, 0, "Message missing required fields: " + 809 JoinStrings(missing_fields, ", ")); 810 return false; 811 } 812 return true; 813 } 814 815 bool TextFormat::Parser::ParseFieldValueFromString( 816 const string& input, 817 const FieldDescriptor* field, 818 Message* output) { 819 io::ArrayInputStream input_stream(input.data(), input.size()); 820 ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_, 821 ParserImpl::ALLOW_SINGULAR_OVERWRITES); 822 return parser.ParseField(field, output); 823 } 824 825 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input, 826 Message* output) { 827 return Parser().Parse(input, output); 828 } 829 830 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input, 831 Message* output) { 832 return Parser().Merge(input, output); 833 } 834 835 /* static */ bool TextFormat::ParseFromString(const string& input, 836 Message* output) { 837 return Parser().ParseFromString(input, output); 838 } 839 840 /* static */ bool TextFormat::MergeFromString(const string& input, 841 Message* output) { 842 return Parser().MergeFromString(input, output); 843 } 844 845 // =========================================================================== 846 847 TextFormat::Printer::Printer() 848 : initial_indent_level_(0), 849 single_line_mode_(false), 850 use_short_repeated_primitives_(false), 851 utf8_string_escaping_(false) {} 852 853 TextFormat::Printer::~Printer() {} 854 855 bool TextFormat::Printer::PrintToString(const Message& message, 856 string* output) { 857 GOOGLE_DCHECK(output) << "output specified is NULL"; 858 859 output->clear(); 860 io::StringOutputStream output_stream(output); 861 862 bool result = Print(message, &output_stream); 863 864 return result; 865 } 866 867 bool TextFormat::Printer::PrintUnknownFieldsToString( 868 const UnknownFieldSet& unknown_fields, 869 string* output) { 870 GOOGLE_DCHECK(output) << "output specified is NULL"; 871 872 output->clear(); 873 io::StringOutputStream output_stream(output); 874 return PrintUnknownFields(unknown_fields, &output_stream); 875 } 876 877 bool TextFormat::Printer::Print(const Message& message, 878 io::ZeroCopyOutputStream* output) { 879 TextGenerator generator(output, initial_indent_level_); 880 881 Print(message, generator); 882 883 // Output false if the generator failed internally. 884 return !generator.failed(); 885 } 886 887 bool TextFormat::Printer::PrintUnknownFields( 888 const UnknownFieldSet& unknown_fields, 889 io::ZeroCopyOutputStream* output) { 890 TextGenerator generator(output, initial_indent_level_); 891 892 PrintUnknownFields(unknown_fields, generator); 893 894 // Output false if the generator failed internally. 895 return !generator.failed(); 896 } 897 898 void TextFormat::Printer::Print(const Message& message, 899 TextGenerator& generator) { 900 const Reflection* reflection = message.GetReflection(); 901 vector<const FieldDescriptor*> fields; 902 reflection->ListFields(message, &fields); 903 for (int i = 0; i < fields.size(); i++) { 904 PrintField(message, reflection, fields[i], generator); 905 } 906 PrintUnknownFields(reflection->GetUnknownFields(message), generator); 907 } 908 909 void TextFormat::Printer::PrintFieldValueToString( 910 const Message& message, 911 const FieldDescriptor* field, 912 int index, 913 string* output) { 914 915 GOOGLE_DCHECK(output) << "output specified is NULL"; 916 917 output->clear(); 918 io::StringOutputStream output_stream(output); 919 TextGenerator generator(&output_stream, initial_indent_level_); 920 921 PrintFieldValue(message, message.GetReflection(), field, index, generator); 922 } 923 924 void TextFormat::Printer::PrintField(const Message& message, 925 const Reflection* reflection, 926 const FieldDescriptor* field, 927 TextGenerator& generator) { 928 if (use_short_repeated_primitives_ && 929 field->is_repeated() && 930 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING && 931 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) { 932 PrintShortRepeatedField(message, reflection, field, generator); 933 return; 934 } 935 936 int count = 0; 937 938 if (field->is_repeated()) { 939 count = reflection->FieldSize(message, field); 940 } else if (reflection->HasField(message, field)) { 941 count = 1; 942 } 943 944 for (int j = 0; j < count; ++j) { 945 PrintFieldName(message, reflection, field, generator); 946 947 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 948 if (single_line_mode_) { 949 generator.Print(" { "); 950 } else { 951 generator.Print(" {\n"); 952 generator.Indent(); 953 } 954 } else { 955 generator.Print(": "); 956 } 957 958 // Write the field value. 959 int field_index = j; 960 if (!field->is_repeated()) { 961 field_index = -1; 962 } 963 964 PrintFieldValue(message, reflection, field, field_index, generator); 965 966 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 967 if (single_line_mode_) { 968 generator.Print("} "); 969 } else { 970 generator.Outdent(); 971 generator.Print("}\n"); 972 } 973 } else { 974 if (single_line_mode_) { 975 generator.Print(" "); 976 } else { 977 generator.Print("\n"); 978 } 979 } 980 } 981 } 982 983 void TextFormat::Printer::PrintShortRepeatedField(const Message& message, 984 const Reflection* reflection, 985 const FieldDescriptor* field, 986 TextGenerator& generator) { 987 // Print primitive repeated field in short form. 988 PrintFieldName(message, reflection, field, generator); 989 990 int size = reflection->FieldSize(message, field); 991 generator.Print(": ["); 992 for (int i = 0; i < size; i++) { 993 if (i > 0) generator.Print(", "); 994 PrintFieldValue(message, reflection, field, i, generator); 995 } 996 if (single_line_mode_) { 997 generator.Print("] "); 998 } else { 999 generator.Print("]\n"); 1000 } 1001 } 1002 1003 void TextFormat::Printer::PrintFieldName(const Message& message, 1004 const Reflection* reflection, 1005 const FieldDescriptor* field, 1006 TextGenerator& generator) { 1007 if (field->is_extension()) { 1008 generator.Print("["); 1009 // We special-case MessageSet elements for compatibility with proto1. 1010 if (field->containing_type()->options().message_set_wire_format() 1011 && field->type() == FieldDescriptor::TYPE_MESSAGE 1012 && field->is_optional() 1013 && field->extension_scope() == field->message_type()) { 1014 generator.Print(field->message_type()->full_name()); 1015 } else { 1016 generator.Print(field->full_name()); 1017 } 1018 generator.Print("]"); 1019 } else { 1020 if (field->type() == FieldDescriptor::TYPE_GROUP) { 1021 // Groups must be serialized with their original capitalization. 1022 generator.Print(field->message_type()->name()); 1023 } else { 1024 generator.Print(field->name()); 1025 } 1026 } 1027 } 1028 1029 void TextFormat::Printer::PrintFieldValue( 1030 const Message& message, 1031 const Reflection* reflection, 1032 const FieldDescriptor* field, 1033 int index, 1034 TextGenerator& generator) { 1035 GOOGLE_DCHECK(field->is_repeated() || (index == -1)) 1036 << "Index must be -1 for non-repeated fields"; 1037 1038 switch (field->cpp_type()) { 1039 #define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING) \ 1040 case FieldDescriptor::CPPTYPE_##CPPTYPE: \ 1041 generator.Print(TO_STRING(field->is_repeated() ? \ 1042 reflection->GetRepeated##METHOD(message, field, index) : \ 1043 reflection->Get##METHOD(message, field))); \ 1044 break; \ 1045 1046 OUTPUT_FIELD( INT32, Int32, SimpleItoa); 1047 OUTPUT_FIELD( INT64, Int64, SimpleItoa); 1048 OUTPUT_FIELD(UINT32, UInt32, SimpleItoa); 1049 OUTPUT_FIELD(UINT64, UInt64, SimpleItoa); 1050 OUTPUT_FIELD( FLOAT, Float, SimpleFtoa); 1051 OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa); 1052 #undef OUTPUT_FIELD 1053 1054 case FieldDescriptor::CPPTYPE_STRING: { 1055 string scratch; 1056 const string& value = field->is_repeated() ? 1057 reflection->GetRepeatedStringReference( 1058 message, field, index, &scratch) : 1059 reflection->GetStringReference(message, field, &scratch); 1060 1061 generator.Print("\""); 1062 if (utf8_string_escaping_) { 1063 generator.Print(strings::Utf8SafeCEscape(value)); 1064 } else { 1065 generator.Print(CEscape(value)); 1066 } 1067 generator.Print("\""); 1068 1069 break; 1070 } 1071 1072 case FieldDescriptor::CPPTYPE_BOOL: 1073 if (field->is_repeated()) { 1074 generator.Print(reflection->GetRepeatedBool(message, field, index) 1075 ? "true" : "false"); 1076 } else { 1077 generator.Print(reflection->GetBool(message, field) 1078 ? "true" : "false"); 1079 } 1080 break; 1081 1082 case FieldDescriptor::CPPTYPE_ENUM: 1083 generator.Print(field->is_repeated() ? 1084 reflection->GetRepeatedEnum(message, field, index)->name() : 1085 reflection->GetEnum(message, field)->name()); 1086 break; 1087 1088 case FieldDescriptor::CPPTYPE_MESSAGE: 1089 Print(field->is_repeated() ? 1090 reflection->GetRepeatedMessage(message, field, index) : 1091 reflection->GetMessage(message, field), 1092 generator); 1093 break; 1094 } 1095 } 1096 1097 /* static */ bool TextFormat::Print(const Message& message, 1098 io::ZeroCopyOutputStream* output) { 1099 return Printer().Print(message, output); 1100 } 1101 1102 /* static */ bool TextFormat::PrintUnknownFields( 1103 const UnknownFieldSet& unknown_fields, 1104 io::ZeroCopyOutputStream* output) { 1105 return Printer().PrintUnknownFields(unknown_fields, output); 1106 } 1107 1108 /* static */ bool TextFormat::PrintToString( 1109 const Message& message, string* output) { 1110 return Printer().PrintToString(message, output); 1111 } 1112 1113 /* static */ bool TextFormat::PrintUnknownFieldsToString( 1114 const UnknownFieldSet& unknown_fields, string* output) { 1115 return Printer().PrintUnknownFieldsToString(unknown_fields, output); 1116 } 1117 1118 /* static */ void TextFormat::PrintFieldValueToString( 1119 const Message& message, 1120 const FieldDescriptor* field, 1121 int index, 1122 string* output) { 1123 return Printer().PrintFieldValueToString(message, field, index, output); 1124 } 1125 1126 /* static */ bool TextFormat::ParseFieldValueFromString( 1127 const string& input, 1128 const FieldDescriptor* field, 1129 Message* message) { 1130 return Parser().ParseFieldValueFromString(input, field, message); 1131 } 1132 1133 // Prints an integer as hex with a fixed number of digits dependent on the 1134 // integer type. 1135 template<typename IntType> 1136 static string PaddedHex(IntType value) { 1137 string result; 1138 result.reserve(sizeof(value) * 2); 1139 for (int i = sizeof(value) * 2 - 1; i >= 0; i--) { 1140 result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F)); 1141 } 1142 return result; 1143 } 1144 1145 void TextFormat::Printer::PrintUnknownFields( 1146 const UnknownFieldSet& unknown_fields, TextGenerator& generator) { 1147 for (int i = 0; i < unknown_fields.field_count(); i++) { 1148 const UnknownField& field = unknown_fields.field(i); 1149 string field_number = SimpleItoa(field.number()); 1150 1151 switch (field.type()) { 1152 case UnknownField::TYPE_VARINT: 1153 generator.Print(field_number); 1154 generator.Print(": "); 1155 generator.Print(SimpleItoa(field.varint())); 1156 if (single_line_mode_) { 1157 generator.Print(" "); 1158 } else { 1159 generator.Print("\n"); 1160 } 1161 break; 1162 case UnknownField::TYPE_FIXED32: { 1163 generator.Print(field_number); 1164 generator.Print(": 0x"); 1165 char buffer[kFastToBufferSize]; 1166 generator.Print(FastHex32ToBuffer(field.fixed32(), buffer)); 1167 if (single_line_mode_) { 1168 generator.Print(" "); 1169 } else { 1170 generator.Print("\n"); 1171 } 1172 break; 1173 } 1174 case UnknownField::TYPE_FIXED64: { 1175 generator.Print(field_number); 1176 generator.Print(": 0x"); 1177 char buffer[kFastToBufferSize]; 1178 generator.Print(FastHex64ToBuffer(field.fixed64(), buffer)); 1179 if (single_line_mode_) { 1180 generator.Print(" "); 1181 } else { 1182 generator.Print("\n"); 1183 } 1184 break; 1185 } 1186 case UnknownField::TYPE_LENGTH_DELIMITED: { 1187 generator.Print(field_number); 1188 const string& value = field.length_delimited(); 1189 UnknownFieldSet embedded_unknown_fields; 1190 if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) { 1191 // This field is parseable as a Message. 1192 // So it is probably an embedded message. 1193 if (single_line_mode_) { 1194 generator.Print(" { "); 1195 } else { 1196 generator.Print(" {\n"); 1197 generator.Indent(); 1198 } 1199 PrintUnknownFields(embedded_unknown_fields, generator); 1200 if (single_line_mode_) { 1201 generator.Print("} "); 1202 } else { 1203 generator.Outdent(); 1204 generator.Print("}\n"); 1205 } 1206 } else { 1207 // This field is not parseable as a Message. 1208 // So it is probably just a plain string. 1209 generator.Print(": \""); 1210 generator.Print(CEscape(value)); 1211 generator.Print("\""); 1212 if (single_line_mode_) { 1213 generator.Print(" "); 1214 } else { 1215 generator.Print("\n"); 1216 } 1217 } 1218 break; 1219 } 1220 case UnknownField::TYPE_GROUP: 1221 generator.Print(field_number); 1222 if (single_line_mode_) { 1223 generator.Print(" { "); 1224 } else { 1225 generator.Print(" {\n"); 1226 generator.Indent(); 1227 } 1228 PrintUnknownFields(field.group(), generator); 1229 if (single_line_mode_) { 1230 generator.Print("} "); 1231 } else { 1232 generator.Outdent(); 1233 generator.Print("}\n"); 1234 } 1235 break; 1236 } 1237 } 1238 } 1239 1240 } // namespace protobuf 1241 } // namespace google 1242