1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr (at) google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 35 #include <algorithm> 36 #include <float.h> 37 #include <math.h> 38 #include <stdio.h> 39 #include <stack> 40 #include <limits> 41 #include <vector> 42 43 #include <google/protobuf/text_format.h> 44 45 #include <google/protobuf/descriptor.h> 46 #include <google/protobuf/dynamic_message.h> 47 #include <google/protobuf/repeated_field.h> 48 #include <google/protobuf/wire_format_lite.h> 49 #include <google/protobuf/io/strtod.h> 50 #include <google/protobuf/io/coded_stream.h> 51 #include <google/protobuf/io/zero_copy_stream.h> 52 #include <google/protobuf/io/zero_copy_stream_impl.h> 53 #include <google/protobuf/unknown_field_set.h> 54 #include <google/protobuf/descriptor.pb.h> 55 #include <google/protobuf/io/tokenizer.h> 56 #include <google/protobuf/any.h> 57 #include <google/protobuf/stubs/stringprintf.h> 58 #include <google/protobuf/stubs/strutil.h> 59 #include <google/protobuf/stubs/map_util.h> 60 #include <google/protobuf/stubs/stl_util.h> 61 62 namespace google { 63 namespace protobuf { 64 65 namespace { 66 67 inline bool IsHexNumber(const string& str) { 68 return (str.length() >= 2 && str[0] == '0' && 69 (str[1] == 'x' || str[1] == 'X')); 70 } 71 72 inline bool IsOctNumber(const string& str) { 73 return (str.length() >= 2 && str[0] == '0' && 74 (str[1] >= '0' && str[1] < '8')); 75 } 76 77 inline bool GetAnyFieldDescriptors(const Message& message, 78 const FieldDescriptor** type_url_field, 79 const FieldDescriptor** value_field) { 80 const Descriptor* descriptor = message.GetDescriptor(); 81 *type_url_field = descriptor->FindFieldByNumber(1); 82 *value_field = descriptor->FindFieldByNumber(2); 83 return (*type_url_field != NULL && 84 (*type_url_field)->type() == FieldDescriptor::TYPE_STRING && 85 *value_field != NULL && 86 (*value_field)->type() == FieldDescriptor::TYPE_BYTES); 87 } 88 89 } // namespace 90 91 string Message::DebugString() const { 92 string debug_string; 93 94 TextFormat::Printer printer; 95 printer.SetExpandAny(true); 96 97 printer.PrintToString(*this, &debug_string); 98 99 return debug_string; 100 } 101 102 string Message::ShortDebugString() const { 103 string debug_string; 104 105 TextFormat::Printer printer; 106 printer.SetSingleLineMode(true); 107 printer.SetExpandAny(true); 108 109 printer.PrintToString(*this, &debug_string); 110 // Single line mode currently might have an extra space at the end. 111 if (debug_string.size() > 0 && 112 debug_string[debug_string.size() - 1] == ' ') { 113 debug_string.resize(debug_string.size() - 1); 114 } 115 116 return debug_string; 117 } 118 119 string Message::Utf8DebugString() const { 120 string debug_string; 121 122 TextFormat::Printer printer; 123 printer.SetUseUtf8StringEscaping(true); 124 printer.SetExpandAny(true); 125 126 printer.PrintToString(*this, &debug_string); 127 128 return debug_string; 129 } 130 131 void Message::PrintDebugString() const { 132 printf("%s", DebugString().c_str()); 133 } 134 135 136 // =========================================================================== 137 // Implementation of the parse information tree class. 138 TextFormat::ParseInfoTree::ParseInfoTree() { } 139 140 TextFormat::ParseInfoTree::~ParseInfoTree() { 141 // Remove any nested information trees, as they are owned by this tree. 142 for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) { 143 STLDeleteElements(&(it->second)); 144 } 145 } 146 147 void TextFormat::ParseInfoTree::RecordLocation( 148 const FieldDescriptor* field, 149 TextFormat::ParseLocation location) { 150 locations_[field].push_back(location); 151 } 152 153 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested( 154 const FieldDescriptor* field) { 155 // Owned by us in the map. 156 TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree(); 157 vector<TextFormat::ParseInfoTree*>* trees = &nested_[field]; 158 GOOGLE_CHECK(trees); 159 trees->push_back(instance); 160 return instance; 161 } 162 163 void CheckFieldIndex(const FieldDescriptor* field, int index) { 164 if (field == NULL) { return; } 165 166 if (field->is_repeated() && index == -1) { 167 GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. " 168 << "Field: " << field->name(); 169 } else if (!field->is_repeated() && index != -1) { 170 GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields." 171 << "Field: " << field->name(); 172 } 173 } 174 175 TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation( 176 const FieldDescriptor* field, int index) const { 177 CheckFieldIndex(field, index); 178 if (index == -1) { index = 0; } 179 180 const vector<TextFormat::ParseLocation>* locations = 181 FindOrNull(locations_, field); 182 if (locations == NULL || index >= locations->size()) { 183 return TextFormat::ParseLocation(); 184 } 185 186 return (*locations)[index]; 187 } 188 189 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested( 190 const FieldDescriptor* field, int index) const { 191 CheckFieldIndex(field, index); 192 if (index == -1) { index = 0; } 193 194 const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field); 195 if (trees == NULL || index >= trees->size()) { 196 return NULL; 197 } 198 199 return (*trees)[index]; 200 } 201 202 203 // =========================================================================== 204 // Internal class for parsing an ASCII representation of a Protocol Message. 205 // This class makes use of the Protocol Message compiler's tokenizer found 206 // in //google/protobuf/io/tokenizer.h. Note that class's Parse 207 // method is *not* thread-safe and should only be used in a single thread at 208 // a time. 209 210 // Makes code slightly more readable. The meaning of "DO(foo)" is 211 // "Execute foo and fail if it fails.", where failure is indicated by 212 // returning false. Borrowed from parser.cc (Thanks Kenton!). 213 #define DO(STATEMENT) if (STATEMENT) {} else return false 214 215 class TextFormat::Parser::ParserImpl { 216 public: 217 218 // Determines if repeated values for non-repeated fields and 219 // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a 220 // required/optional field named "foo", or "baz: 1 qux: 2" 221 // where "baz" and "qux" are members of the same oneof. 222 enum SingularOverwritePolicy { 223 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained 224 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued 225 }; 226 227 ParserImpl(const Descriptor* root_message_type, 228 io::ZeroCopyInputStream* input_stream, 229 io::ErrorCollector* error_collector, 230 TextFormat::Finder* finder, 231 ParseInfoTree* parse_info_tree, 232 SingularOverwritePolicy singular_overwrite_policy, 233 bool allow_case_insensitive_field, 234 bool allow_unknown_field, 235 bool allow_unknown_enum, 236 bool allow_field_number, 237 bool allow_relaxed_whitespace) 238 : error_collector_(error_collector), 239 finder_(finder), 240 parse_info_tree_(parse_info_tree), 241 tokenizer_error_collector_(this), 242 tokenizer_(input_stream, &tokenizer_error_collector_), 243 root_message_type_(root_message_type), 244 singular_overwrite_policy_(singular_overwrite_policy), 245 allow_case_insensitive_field_(allow_case_insensitive_field), 246 allow_unknown_field_(allow_unknown_field), 247 allow_unknown_enum_(allow_unknown_enum), 248 allow_field_number_(allow_field_number), 249 had_errors_(false) { 250 // For backwards-compatibility with proto1, we need to allow the 'f' suffix 251 // for floats. 252 tokenizer_.set_allow_f_after_float(true); 253 254 // '#' starts a comment. 255 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE); 256 257 if (allow_relaxed_whitespace) { 258 tokenizer_.set_require_space_after_number(false); 259 tokenizer_.set_allow_multiline_strings(true); 260 } 261 262 // Consume the starting token. 263 tokenizer_.Next(); 264 } 265 ~ParserImpl() { } 266 267 // Parses the ASCII representation specified in input and saves the 268 // information into the output pointer (a Message). Returns 269 // false if an error occurs (an error will also be logged to 270 // GOOGLE_LOG(ERROR)). 271 bool Parse(Message* output) { 272 // Consume fields until we cannot do so anymore. 273 while (true) { 274 if (LookingAtType(io::Tokenizer::TYPE_END)) { 275 return !had_errors_; 276 } 277 278 DO(ConsumeField(output)); 279 } 280 } 281 282 bool ParseField(const FieldDescriptor* field, Message* output) { 283 bool suc; 284 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 285 suc = ConsumeFieldMessage(output, output->GetReflection(), field); 286 } else { 287 suc = ConsumeFieldValue(output, output->GetReflection(), field); 288 } 289 return suc && LookingAtType(io::Tokenizer::TYPE_END); 290 } 291 292 void ReportError(int line, int col, const string& message) { 293 had_errors_ = true; 294 if (error_collector_ == NULL) { 295 if (line >= 0) { 296 GOOGLE_LOG(ERROR) << "Error parsing text-format " 297 << root_message_type_->full_name() 298 << ": " << (line + 1) << ":" 299 << (col + 1) << ": " << message; 300 } else { 301 GOOGLE_LOG(ERROR) << "Error parsing text-format " 302 << root_message_type_->full_name() 303 << ": " << message; 304 } 305 } else { 306 error_collector_->AddError(line, col, message); 307 } 308 } 309 310 void ReportWarning(int line, int col, const string& message) { 311 if (error_collector_ == NULL) { 312 if (line >= 0) { 313 GOOGLE_LOG(WARNING) << "Warning parsing text-format " 314 << root_message_type_->full_name() 315 << ": " << (line + 1) << ":" 316 << (col + 1) << ": " << message; 317 } else { 318 GOOGLE_LOG(WARNING) << "Warning parsing text-format " 319 << root_message_type_->full_name() 320 << ": " << message; 321 } 322 } else { 323 error_collector_->AddWarning(line, col, message); 324 } 325 } 326 327 private: 328 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl); 329 330 // Reports an error with the given message with information indicating 331 // the position (as derived from the current token). 332 void ReportError(const string& message) { 333 ReportError(tokenizer_.current().line, tokenizer_.current().column, 334 message); 335 } 336 337 // Reports a warning with the given message with information indicating 338 // the position (as derived from the current token). 339 void ReportWarning(const string& message) { 340 ReportWarning(tokenizer_.current().line, tokenizer_.current().column, 341 message); 342 } 343 344 // Consumes the specified message with the given starting delimiter. 345 // This method checks to see that the end delimiter at the conclusion of 346 // the consumption matches the starting delimiter passed in here. 347 bool ConsumeMessage(Message* message, const string delimiter) { 348 while (!LookingAt(">") && !LookingAt("}")) { 349 DO(ConsumeField(message)); 350 } 351 352 // Confirm that we have a valid ending delimiter. 353 DO(Consume(delimiter)); 354 return true; 355 } 356 357 // Consume either "<" or "{". 358 bool ConsumeMessageDelimiter(string* delimiter) { 359 if (TryConsume("<")) { 360 *delimiter = ">"; 361 } else { 362 DO(Consume("{")); 363 *delimiter = "}"; 364 } 365 return true; 366 } 367 368 369 // Consumes the current field (as returned by the tokenizer) on the 370 // passed in message. 371 bool ConsumeField(Message* message) { 372 const Reflection* reflection = message->GetReflection(); 373 const Descriptor* descriptor = message->GetDescriptor(); 374 375 string field_name; 376 377 const FieldDescriptor* field = NULL; 378 int start_line = tokenizer_.current().line; 379 int start_column = tokenizer_.current().column; 380 381 const FieldDescriptor* any_type_url_field; 382 const FieldDescriptor* any_value_field; 383 if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field, 384 &any_value_field) && 385 TryConsume("[")) { 386 string full_type_name, prefix; 387 DO(ConsumeAnyTypeUrl(&full_type_name, &prefix)); 388 DO(Consume("]")); 389 TryConsume(":"); // ':' is optional between message labels and values. 390 string serialized_value; 391 DO(ConsumeAnyValue(full_type_name, 392 message->GetDescriptor()->file()->pool(), 393 &serialized_value)); 394 reflection->SetString( 395 message, any_type_url_field, 396 string(prefix + full_type_name)); 397 reflection->SetString(message, any_value_field, serialized_value); 398 return true; 399 } 400 if (TryConsume("[")) { 401 // Extension. 402 DO(ConsumeFullTypeName(&field_name)); 403 DO(Consume("]")); 404 405 field = (finder_ != NULL 406 ? finder_->FindExtension(message, field_name) 407 : reflection->FindKnownExtensionByName(field_name)); 408 409 if (field == NULL) { 410 if (!allow_unknown_field_) { 411 ReportError("Extension \"" + field_name + "\" is not defined or " 412 "is not an extension of \"" + 413 descriptor->full_name() + "\"."); 414 return false; 415 } else { 416 ReportWarning("Extension \"" + field_name + "\" is not defined or " 417 "is not an extension of \"" + 418 descriptor->full_name() + "\"."); 419 } 420 } 421 } else { 422 DO(ConsumeIdentifier(&field_name)); 423 424 int32 field_number; 425 if (allow_field_number_ && safe_strto32(field_name, &field_number)) { 426 if (descriptor->IsExtensionNumber(field_number)) { 427 field = reflection->FindKnownExtensionByNumber(field_number); 428 } else { 429 field = descriptor->FindFieldByNumber(field_number); 430 } 431 } else { 432 field = descriptor->FindFieldByName(field_name); 433 // Group names are expected to be capitalized as they appear in the 434 // .proto file, which actually matches their type names, not their 435 // field names. 436 if (field == NULL) { 437 string lower_field_name = field_name; 438 LowerString(&lower_field_name); 439 field = descriptor->FindFieldByName(lower_field_name); 440 // If the case-insensitive match worked but the field is NOT a group, 441 if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) { 442 field = NULL; 443 } 444 } 445 // Again, special-case group names as described above. 446 if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP 447 && field->message_type()->name() != field_name) { 448 field = NULL; 449 } 450 451 if (field == NULL && allow_case_insensitive_field_) { 452 string lower_field_name = field_name; 453 LowerString(&lower_field_name); 454 field = descriptor->FindFieldByLowercaseName(lower_field_name); 455 } 456 } 457 458 if (field == NULL) { 459 if (!allow_unknown_field_) { 460 ReportError("Message type \"" + descriptor->full_name() + 461 "\" has no field named \"" + field_name + "\"."); 462 return false; 463 } else { 464 ReportWarning("Message type \"" + descriptor->full_name() + 465 "\" has no field named \"" + field_name + "\"."); 466 } 467 } 468 } 469 470 // Skips unknown field. 471 if (field == NULL) { 472 GOOGLE_CHECK(allow_unknown_field_); 473 // Try to guess the type of this field. 474 // If this field is not a message, there should be a ":" between the 475 // field name and the field value and also the field value should not 476 // start with "{" or "<" which indicates the beginning of a message body. 477 // If there is no ":" or there is a "{" or "<" after ":", this field has 478 // to be a message or the input is ill-formed. 479 if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { 480 return SkipFieldValue(); 481 } else { 482 return SkipFieldMessage(); 483 } 484 } 485 486 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) { 487 // Fail if the field is not repeated and it has already been specified. 488 if (!field->is_repeated() && reflection->HasField(*message, field)) { 489 ReportError("Non-repeated field \"" + field_name + 490 "\" is specified multiple times."); 491 return false; 492 } 493 // Fail if the field is a member of a oneof and another member has already 494 // been specified. 495 const OneofDescriptor* oneof = field->containing_oneof(); 496 if (oneof != NULL && reflection->HasOneof(*message, oneof)) { 497 const FieldDescriptor* other_field = 498 reflection->GetOneofFieldDescriptor(*message, oneof); 499 ReportError("Field \"" + field_name + "\" is specified along with " 500 "field \"" + other_field->name() + "\", another member " 501 "of oneof \"" + oneof->name() + "\"."); 502 return false; 503 } 504 } 505 506 // Perform special handling for embedded message types. 507 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 508 // ':' is optional here. 509 TryConsume(":"); 510 } else { 511 // ':' is required here. 512 DO(Consume(":")); 513 } 514 515 if (field->is_repeated() && TryConsume("[")) { 516 // Short repeated format, e.g. "foo: [1, 2, 3]" 517 while (true) { 518 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 519 // Perform special handling for embedded message types. 520 DO(ConsumeFieldMessage(message, reflection, field)); 521 } else { 522 DO(ConsumeFieldValue(message, reflection, field)); 523 } 524 if (TryConsume("]")) { 525 break; 526 } 527 DO(Consume(",")); 528 } 529 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 530 DO(ConsumeFieldMessage(message, reflection, field)); 531 } else { 532 DO(ConsumeFieldValue(message, reflection, field)); 533 } 534 535 // For historical reasons, fields may optionally be separated by commas or 536 // semicolons. 537 TryConsume(";") || TryConsume(","); 538 539 if (field->options().deprecated()) { 540 ReportWarning("text format contains deprecated field \"" 541 + field_name + "\""); 542 } 543 544 // If a parse info tree exists, add the location for the parsed 545 // field. 546 if (parse_info_tree_ != NULL) { 547 RecordLocation(parse_info_tree_, field, 548 ParseLocation(start_line, start_column)); 549 } 550 551 return true; 552 } 553 554 // Skips the next field including the field's name and value. 555 bool SkipField() { 556 string field_name; 557 if (TryConsume("[")) { 558 // Extension name. 559 DO(ConsumeFullTypeName(&field_name)); 560 DO(Consume("]")); 561 } else { 562 DO(ConsumeIdentifier(&field_name)); 563 } 564 565 // Try to guess the type of this field. 566 // If this field is not a message, there should be a ":" between the 567 // field name and the field value and also the field value should not 568 // start with "{" or "<" which indicates the beginning of a message body. 569 // If there is no ":" or there is a "{" or "<" after ":", this field has 570 // to be a message or the input is ill-formed. 571 if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) { 572 DO(SkipFieldValue()); 573 } else { 574 DO(SkipFieldMessage()); 575 } 576 // For historical reasons, fields may optionally be separated by commas or 577 // semicolons. 578 TryConsume(";") || TryConsume(","); 579 return true; 580 } 581 582 bool ConsumeFieldMessage(Message* message, 583 const Reflection* reflection, 584 const FieldDescriptor* field) { 585 586 // If the parse information tree is not NULL, create a nested one 587 // for the nested message. 588 ParseInfoTree* parent = parse_info_tree_; 589 if (parent != NULL) { 590 parse_info_tree_ = CreateNested(parent, field); 591 } 592 593 string delimiter; 594 DO(ConsumeMessageDelimiter(&delimiter)); 595 if (field->is_repeated()) { 596 DO(ConsumeMessage(reflection->AddMessage(message, field), delimiter)); 597 } else { 598 DO(ConsumeMessage(reflection->MutableMessage(message, field), 599 delimiter)); 600 } 601 602 // Reset the parse information tree. 603 parse_info_tree_ = parent; 604 return true; 605 } 606 607 // Skips the whole body of a message including the beginning delimiter and 608 // the ending delimiter. 609 bool SkipFieldMessage() { 610 string delimiter; 611 DO(ConsumeMessageDelimiter(&delimiter)); 612 while (!LookingAt(">") && !LookingAt("}")) { 613 DO(SkipField()); 614 } 615 DO(Consume(delimiter)); 616 return true; 617 } 618 619 bool ConsumeFieldValue(Message* message, 620 const Reflection* reflection, 621 const FieldDescriptor* field) { 622 623 // Define an easy to use macro for setting fields. This macro checks 624 // to see if the field is repeated (in which case we need to use the Add 625 // methods or not (in which case we need to use the Set methods). 626 #define SET_FIELD(CPPTYPE, VALUE) \ 627 if (field->is_repeated()) { \ 628 reflection->Add##CPPTYPE(message, field, VALUE); \ 629 } else { \ 630 reflection->Set##CPPTYPE(message, field, VALUE); \ 631 } \ 632 633 switch(field->cpp_type()) { 634 case FieldDescriptor::CPPTYPE_INT32: { 635 int64 value; 636 DO(ConsumeSignedInteger(&value, kint32max)); 637 SET_FIELD(Int32, static_cast<int32>(value)); 638 break; 639 } 640 641 case FieldDescriptor::CPPTYPE_UINT32: { 642 uint64 value; 643 DO(ConsumeUnsignedInteger(&value, kuint32max)); 644 SET_FIELD(UInt32, static_cast<uint32>(value)); 645 break; 646 } 647 648 case FieldDescriptor::CPPTYPE_INT64: { 649 int64 value; 650 DO(ConsumeSignedInteger(&value, kint64max)); 651 SET_FIELD(Int64, value); 652 break; 653 } 654 655 case FieldDescriptor::CPPTYPE_UINT64: { 656 uint64 value; 657 DO(ConsumeUnsignedInteger(&value, kuint64max)); 658 SET_FIELD(UInt64, value); 659 break; 660 } 661 662 case FieldDescriptor::CPPTYPE_FLOAT: { 663 double value; 664 DO(ConsumeDouble(&value)); 665 SET_FIELD(Float, io::SafeDoubleToFloat(value)); 666 break; 667 } 668 669 case FieldDescriptor::CPPTYPE_DOUBLE: { 670 double value; 671 DO(ConsumeDouble(&value)); 672 SET_FIELD(Double, value); 673 break; 674 } 675 676 case FieldDescriptor::CPPTYPE_STRING: { 677 string value; 678 DO(ConsumeString(&value)); 679 SET_FIELD(String, value); 680 break; 681 } 682 683 case FieldDescriptor::CPPTYPE_BOOL: { 684 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 685 uint64 value; 686 DO(ConsumeUnsignedInteger(&value, 1)); 687 SET_FIELD(Bool, value); 688 } else { 689 string value; 690 DO(ConsumeIdentifier(&value)); 691 if (value == "true" || value == "True" || value == "t") { 692 SET_FIELD(Bool, true); 693 } else if (value == "false" || value == "False" || value == "f") { 694 SET_FIELD(Bool, false); 695 } else { 696 ReportError("Invalid value for boolean field \"" + field->name() 697 + "\". Value: \"" + value + "\"."); 698 return false; 699 } 700 } 701 break; 702 } 703 704 case FieldDescriptor::CPPTYPE_ENUM: { 705 string value; 706 const EnumDescriptor* enum_type = field->enum_type(); 707 const EnumValueDescriptor* enum_value = NULL; 708 709 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 710 DO(ConsumeIdentifier(&value)); 711 // Find the enumeration value. 712 enum_value = enum_type->FindValueByName(value); 713 714 } else if (LookingAt("-") || 715 LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 716 int64 int_value; 717 DO(ConsumeSignedInteger(&int_value, kint32max)); 718 value = SimpleItoa(int_value); // for error reporting 719 enum_value = enum_type->FindValueByNumber(int_value); 720 } else { 721 ReportError("Expected integer or identifier."); 722 return false; 723 } 724 725 if (enum_value == NULL) { 726 if (!allow_unknown_enum_) { 727 ReportError("Unknown enumeration value of \"" + value + "\" for " 728 "field \"" + field->name() + "\"."); 729 return false; 730 } else { 731 ReportWarning("Unknown enumeration value of \"" + value + "\" for " 732 "field \"" + field->name() + "\"."); 733 return true; 734 } 735 } 736 737 SET_FIELD(Enum, enum_value); 738 break; 739 } 740 741 case FieldDescriptor::CPPTYPE_MESSAGE: { 742 // We should never get here. Put here instead of a default 743 // so that if new types are added, we get a nice compiler warning. 744 GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE"; 745 break; 746 } 747 } 748 #undef SET_FIELD 749 return true; 750 } 751 752 bool SkipFieldValue() { 753 if (LookingAtType(io::Tokenizer::TYPE_STRING)) { 754 while (LookingAtType(io::Tokenizer::TYPE_STRING)) { 755 tokenizer_.Next(); 756 } 757 return true; 758 } 759 // Possible field values other than string: 760 // 12345 => TYPE_INTEGER 761 // -12345 => TYPE_SYMBOL + TYPE_INTEGER 762 // 1.2345 => TYPE_FLOAT 763 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT 764 // inf => TYPE_IDENTIFIER 765 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER 766 // TYPE_INTEGER => TYPE_IDENTIFIER 767 // Divides them into two group, one with TYPE_SYMBOL 768 // and the other without: 769 // Group one: 770 // 12345 => TYPE_INTEGER 771 // 1.2345 => TYPE_FLOAT 772 // inf => TYPE_IDENTIFIER 773 // TYPE_INTEGER => TYPE_IDENTIFIER 774 // Group two: 775 // -12345 => TYPE_SYMBOL + TYPE_INTEGER 776 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT 777 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER 778 // As we can see, the field value consists of an optional '-' and one of 779 // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER. 780 bool has_minus = TryConsume("-"); 781 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) && 782 !LookingAtType(io::Tokenizer::TYPE_FLOAT) && 783 !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 784 return false; 785 } 786 // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field 787 // value while other combinations all generate valid values. 788 // We check if the value of this combination is valid here. 789 // TYPE_IDENTIFIER after a '-' should be one of the float values listed 790 // below: 791 // inf, inff, infinity, nan 792 if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 793 string text = tokenizer_.current().text; 794 LowerString(&text); 795 if (text != "inf" && 796 text != "infinity" && 797 text != "nan") { 798 ReportError("Invalid float number: " + text); 799 return false; 800 } 801 } 802 tokenizer_.Next(); 803 return true; 804 } 805 806 // Returns true if the current token's text is equal to that specified. 807 bool LookingAt(const string& text) { 808 return tokenizer_.current().text == text; 809 } 810 811 // Returns true if the current token's type is equal to that specified. 812 bool LookingAtType(io::Tokenizer::TokenType token_type) { 813 return tokenizer_.current().type == token_type; 814 } 815 816 // Consumes an identifier and saves its value in the identifier parameter. 817 // Returns false if the token is not of type IDENTFIER. 818 bool ConsumeIdentifier(string* identifier) { 819 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 820 *identifier = tokenizer_.current().text; 821 tokenizer_.Next(); 822 return true; 823 } 824 825 // If allow_field_numer_ or allow_unknown_field_ is true, we should able 826 // to parse integer identifiers. 827 if ((allow_field_number_ || allow_unknown_field_) 828 && LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 829 *identifier = tokenizer_.current().text; 830 tokenizer_.Next(); 831 return true; 832 } 833 834 ReportError("Expected identifier."); 835 return false; 836 } 837 838 // Consume a string of form "<id1>.<id2>....<idN>". 839 bool ConsumeFullTypeName(string* name) { 840 DO(ConsumeIdentifier(name)); 841 while (TryConsume(".")) { 842 string part; 843 DO(ConsumeIdentifier(&part)); 844 *name += "."; 845 *name += part; 846 } 847 return true; 848 } 849 850 // Consumes a string and saves its value in the text parameter. 851 // Returns false if the token is not of type STRING. 852 bool ConsumeString(string* text) { 853 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) { 854 ReportError("Expected string."); 855 return false; 856 } 857 858 text->clear(); 859 while (LookingAtType(io::Tokenizer::TYPE_STRING)) { 860 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text); 861 862 tokenizer_.Next(); 863 } 864 865 return true; 866 } 867 868 // Consumes a uint64 and saves its value in the value parameter. 869 // Returns false if the token is not of type INTEGER. 870 bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) { 871 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 872 ReportError("Expected integer."); 873 return false; 874 } 875 876 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text, 877 max_value, value)) { 878 ReportError("Integer out of range."); 879 return false; 880 } 881 882 tokenizer_.Next(); 883 return true; 884 } 885 886 // Consumes an int64 and saves its value in the value parameter. 887 // Note that since the tokenizer does not support negative numbers, 888 // we actually may consume an additional token (for the minus sign) in this 889 // method. Returns false if the token is not an integer 890 // (signed or otherwise). 891 bool ConsumeSignedInteger(int64* value, uint64 max_value) { 892 bool negative = false; 893 894 if (TryConsume("-")) { 895 negative = true; 896 // Two's complement always allows one more negative integer than 897 // positive. 898 ++max_value; 899 } 900 901 uint64 unsigned_value; 902 903 DO(ConsumeUnsignedInteger(&unsigned_value, max_value)); 904 905 *value = static_cast<int64>(unsigned_value); 906 907 if (negative) { 908 *value = -*value; 909 } 910 911 return true; 912 } 913 914 // Consumes a uint64 and saves its value in the value parameter. 915 // Accepts decimal numbers only, rejects hex or oct numbers. 916 bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) { 917 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 918 ReportError("Expected integer."); 919 return false; 920 } 921 922 const string& text = tokenizer_.current().text; 923 if (IsHexNumber(text) || IsOctNumber(text)) { 924 ReportError("Expect a decimal number."); 925 return false; 926 } 927 928 if (!io::Tokenizer::ParseInteger(text, max_value, value)) { 929 ReportError("Integer out of range."); 930 return false; 931 } 932 933 tokenizer_.Next(); 934 return true; 935 } 936 937 // Consumes a double and saves its value in the value parameter. 938 // Note that since the tokenizer does not support negative numbers, 939 // we actually may consume an additional token (for the minus sign) in this 940 // method. Returns false if the token is not a double 941 // (signed or otherwise). 942 bool ConsumeDouble(double* value) { 943 bool negative = false; 944 945 if (TryConsume("-")) { 946 negative = true; 947 } 948 949 // A double can actually be an integer, according to the tokenizer. 950 // Therefore, we must check both cases here. 951 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 952 // We have found an integer value for the double. 953 uint64 integer_value; 954 DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max)); 955 956 *value = static_cast<double>(integer_value); 957 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) { 958 // We have found a float value for the double. 959 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text); 960 961 // Mark the current token as consumed. 962 tokenizer_.Next(); 963 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 964 string text = tokenizer_.current().text; 965 LowerString(&text); 966 if (text == "inf" || 967 text == "infinity") { 968 *value = std::numeric_limits<double>::infinity(); 969 tokenizer_.Next(); 970 } else if (text == "nan") { 971 *value = std::numeric_limits<double>::quiet_NaN(); 972 tokenizer_.Next(); 973 } else { 974 ReportError("Expected double."); 975 return false; 976 } 977 } else { 978 ReportError("Expected double."); 979 return false; 980 } 981 982 if (negative) { 983 *value = -*value; 984 } 985 986 return true; 987 } 988 989 // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name" 990 // or "type.googleprod.com/full.type.Name" 991 bool ConsumeAnyTypeUrl(string* full_type_name, string* prefix) { 992 // TODO(saito) Extend Consume() to consume multiple tokens at once, so that 993 // this code can be written as just DO(Consume(kGoogleApisTypePrefix)). 994 string url1, url2, url3; 995 DO(ConsumeIdentifier(&url1)); // type 996 DO(Consume(".")); 997 DO(ConsumeIdentifier(&url2)); // googleapis 998 DO(Consume(".")); 999 DO(ConsumeIdentifier(&url3)); // com 1000 DO(Consume("/")); 1001 DO(ConsumeFullTypeName(full_type_name)); 1002 1003 *prefix = url1 + "." + url2 + "." + url3 + "/"; 1004 if (*prefix != internal::kTypeGoogleApisComPrefix && 1005 *prefix != internal::kTypeGoogleProdComPrefix) { 1006 ReportError("TextFormat::Parser for Any supports only " 1007 "type.googleapis.com and type.googleprod.com, " 1008 "but found \"" + *prefix + "\""); 1009 return false; 1010 } 1011 return true; 1012 } 1013 1014 // A helper function for reconstructing Any::value. Consumes a text of 1015 // full_type_name, then serializes it into serialized_value. "pool" is used to 1016 // look up and create a temporary object with full_type_name. 1017 bool ConsumeAnyValue(const string& full_type_name, const DescriptorPool* pool, 1018 string* serialized_value) { 1019 const Descriptor* value_descriptor = 1020 pool->FindMessageTypeByName(full_type_name); 1021 if (value_descriptor == NULL) { 1022 ReportError("Could not find type \"" + full_type_name + 1023 "\" stored in google.protobuf.Any."); 1024 return false; 1025 } 1026 DynamicMessageFactory factory; 1027 const Message* value_prototype = factory.GetPrototype(value_descriptor); 1028 if (value_prototype == NULL) { 1029 return false; 1030 } 1031 google::protobuf::scoped_ptr<Message> value(value_prototype->New()); 1032 string sub_delimiter; 1033 DO(ConsumeMessageDelimiter(&sub_delimiter)); 1034 DO(ConsumeMessage(value.get(), sub_delimiter)); 1035 1036 value->AppendToString(serialized_value); 1037 return true; 1038 } 1039 1040 // Consumes a token and confirms that it matches that specified in the 1041 // value parameter. Returns false if the token found does not match that 1042 // which was specified. 1043 bool Consume(const string& value) { 1044 const string& current_value = tokenizer_.current().text; 1045 1046 if (current_value != value) { 1047 ReportError("Expected \"" + value + "\", found \"" + current_value 1048 + "\"."); 1049 return false; 1050 } 1051 1052 tokenizer_.Next(); 1053 1054 return true; 1055 } 1056 1057 // Attempts to consume the supplied value. Returns false if a the 1058 // token found does not match the value specified. 1059 bool TryConsume(const string& value) { 1060 if (tokenizer_.current().text == value) { 1061 tokenizer_.Next(); 1062 return true; 1063 } else { 1064 return false; 1065 } 1066 } 1067 1068 // An internal instance of the Tokenizer's error collector, used to 1069 // collect any base-level parse errors and feed them to the ParserImpl. 1070 class ParserErrorCollector : public io::ErrorCollector { 1071 public: 1072 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) : 1073 parser_(parser) { } 1074 1075 virtual ~ParserErrorCollector() { } 1076 1077 virtual void AddError(int line, int column, const string& message) { 1078 parser_->ReportError(line, column, message); 1079 } 1080 1081 virtual void AddWarning(int line, int column, const string& message) { 1082 parser_->ReportWarning(line, column, message); 1083 } 1084 1085 private: 1086 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector); 1087 TextFormat::Parser::ParserImpl* parser_; 1088 }; 1089 1090 io::ErrorCollector* error_collector_; 1091 TextFormat::Finder* finder_; 1092 ParseInfoTree* parse_info_tree_; 1093 ParserErrorCollector tokenizer_error_collector_; 1094 io::Tokenizer tokenizer_; 1095 const Descriptor* root_message_type_; 1096 SingularOverwritePolicy singular_overwrite_policy_; 1097 const bool allow_case_insensitive_field_; 1098 const bool allow_unknown_field_; 1099 const bool allow_unknown_enum_; 1100 const bool allow_field_number_; 1101 bool had_errors_; 1102 }; 1103 1104 #undef DO 1105 1106 // =========================================================================== 1107 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted 1108 // from the Printer found in //google/protobuf/io/printer.h 1109 class TextFormat::Printer::TextGenerator { 1110 public: 1111 explicit TextGenerator(io::ZeroCopyOutputStream* output, 1112 int initial_indent_level) 1113 : output_(output), 1114 buffer_(NULL), 1115 buffer_size_(0), 1116 at_start_of_line_(true), 1117 failed_(false), 1118 indent_(""), 1119 initial_indent_level_(initial_indent_level) { 1120 indent_.resize(initial_indent_level_ * 2, ' '); 1121 } 1122 1123 ~TextGenerator() { 1124 // Only BackUp() if we're sure we've successfully called Next() at least 1125 // once. 1126 if (!failed_ && buffer_size_ > 0) { 1127 output_->BackUp(buffer_size_); 1128 } 1129 } 1130 1131 // Indent text by two spaces. After calling Indent(), two spaces will be 1132 // inserted at the beginning of each line of text. Indent() may be called 1133 // multiple times to produce deeper indents. 1134 void Indent() { 1135 indent_ += " "; 1136 } 1137 1138 // Reduces the current indent level by two spaces, or crashes if the indent 1139 // level is zero. 1140 void Outdent() { 1141 if (indent_.empty() || 1142 indent_.size() < initial_indent_level_ * 2) { 1143 GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent()."; 1144 return; 1145 } 1146 1147 indent_.resize(indent_.size() - 2); 1148 } 1149 1150 // Print text to the output stream. 1151 void Print(const string& str) { 1152 Print(str.data(), str.size()); 1153 } 1154 1155 // Print text to the output stream. 1156 void Print(const char* text) { 1157 Print(text, strlen(text)); 1158 } 1159 1160 // Print text to the output stream. 1161 void Print(const char* text, size_t size) { 1162 size_t pos = 0; // The number of bytes we've written so far. 1163 1164 for (size_t i = 0; i < size; i++) { 1165 if (text[i] == '\n') { 1166 // Saw newline. If there is more text, we may need to insert an indent 1167 // here. So, write what we have so far, including the '\n'. 1168 Write(text + pos, i - pos + 1); 1169 pos = i + 1; 1170 1171 // Setting this true will cause the next Write() to insert an indent 1172 // first. 1173 at_start_of_line_ = true; 1174 } 1175 } 1176 1177 // Write the rest. 1178 Write(text + pos, size - pos); 1179 } 1180 1181 // True if any write to the underlying stream failed. (We don't just 1182 // crash in this case because this is an I/O failure, not a programming 1183 // error.) 1184 bool failed() const { return failed_; } 1185 1186 private: 1187 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator); 1188 1189 void Write(const char* data, size_t size) { 1190 if (failed_) return; 1191 if (size == 0) return; 1192 1193 if (at_start_of_line_) { 1194 // Insert an indent. 1195 at_start_of_line_ = false; 1196 Write(indent_.data(), indent_.size()); 1197 if (failed_) return; 1198 } 1199 1200 while (size > buffer_size_) { 1201 // Data exceeds space in the buffer. Copy what we can and request a 1202 // new buffer. 1203 memcpy(buffer_, data, buffer_size_); 1204 data += buffer_size_; 1205 size -= buffer_size_; 1206 void* void_buffer; 1207 failed_ = !output_->Next(&void_buffer, &buffer_size_); 1208 if (failed_) return; 1209 buffer_ = reinterpret_cast<char*>(void_buffer); 1210 } 1211 1212 // Buffer is big enough to receive the data; copy it. 1213 memcpy(buffer_, data, size); 1214 buffer_ += size; 1215 buffer_size_ -= size; 1216 } 1217 1218 io::ZeroCopyOutputStream* const output_; 1219 char* buffer_; 1220 int buffer_size_; 1221 bool at_start_of_line_; 1222 bool failed_; 1223 1224 string indent_; 1225 int initial_indent_level_; 1226 }; 1227 1228 // =========================================================================== 1229 1230 TextFormat::Finder::~Finder() { 1231 } 1232 1233 TextFormat::Parser::Parser() 1234 : error_collector_(NULL), 1235 finder_(NULL), 1236 parse_info_tree_(NULL), 1237 allow_partial_(false), 1238 allow_case_insensitive_field_(false), 1239 allow_unknown_field_(false), 1240 allow_unknown_enum_(false), 1241 allow_field_number_(false), 1242 allow_relaxed_whitespace_(false), 1243 allow_singular_overwrites_(false) { 1244 } 1245 1246 TextFormat::Parser::~Parser() {} 1247 1248 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input, 1249 Message* output) { 1250 output->Clear(); 1251 1252 ParserImpl::SingularOverwritePolicy overwrites_policy = 1253 allow_singular_overwrites_ 1254 ? ParserImpl::ALLOW_SINGULAR_OVERWRITES 1255 : ParserImpl::FORBID_SINGULAR_OVERWRITES; 1256 1257 ParserImpl parser(output->GetDescriptor(), input, error_collector_, 1258 finder_, parse_info_tree_, 1259 overwrites_policy, 1260 allow_case_insensitive_field_, allow_unknown_field_, 1261 allow_unknown_enum_, allow_field_number_, 1262 allow_relaxed_whitespace_); 1263 return MergeUsingImpl(input, output, &parser); 1264 } 1265 1266 bool TextFormat::Parser::ParseFromString(const string& input, 1267 Message* output) { 1268 io::ArrayInputStream input_stream(input.data(), input.size()); 1269 return Parse(&input_stream, output); 1270 } 1271 1272 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input, 1273 Message* output) { 1274 ParserImpl parser(output->GetDescriptor(), input, error_collector_, 1275 finder_, parse_info_tree_, 1276 ParserImpl::ALLOW_SINGULAR_OVERWRITES, 1277 allow_case_insensitive_field_, allow_unknown_field_, 1278 allow_unknown_enum_, allow_field_number_, 1279 allow_relaxed_whitespace_); 1280 return MergeUsingImpl(input, output, &parser); 1281 } 1282 1283 bool TextFormat::Parser::MergeFromString(const string& input, 1284 Message* output) { 1285 io::ArrayInputStream input_stream(input.data(), input.size()); 1286 return Merge(&input_stream, output); 1287 } 1288 1289 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */, 1290 Message* output, 1291 ParserImpl* parser_impl) { 1292 if (!parser_impl->Parse(output)) return false; 1293 if (!allow_partial_ && !output->IsInitialized()) { 1294 vector<string> missing_fields; 1295 output->FindInitializationErrors(&missing_fields); 1296 parser_impl->ReportError(-1, 0, "Message missing required fields: " + 1297 Join(missing_fields, ", ")); 1298 return false; 1299 } 1300 return true; 1301 } 1302 1303 bool TextFormat::Parser::ParseFieldValueFromString( 1304 const string& input, 1305 const FieldDescriptor* field, 1306 Message* output) { 1307 io::ArrayInputStream input_stream(input.data(), input.size()); 1308 ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_, 1309 finder_, parse_info_tree_, 1310 ParserImpl::ALLOW_SINGULAR_OVERWRITES, 1311 allow_case_insensitive_field_, allow_unknown_field_, 1312 allow_unknown_enum_, allow_field_number_, 1313 allow_relaxed_whitespace_); 1314 return parser.ParseField(field, output); 1315 } 1316 1317 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input, 1318 Message* output) { 1319 return Parser().Parse(input, output); 1320 } 1321 1322 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input, 1323 Message* output) { 1324 return Parser().Merge(input, output); 1325 } 1326 1327 /* static */ bool TextFormat::ParseFromString(const string& input, 1328 Message* output) { 1329 return Parser().ParseFromString(input, output); 1330 } 1331 1332 /* static */ bool TextFormat::MergeFromString(const string& input, 1333 Message* output) { 1334 return Parser().MergeFromString(input, output); 1335 } 1336 1337 // =========================================================================== 1338 1339 // The default implementation for FieldValuePrinter. The base class just 1340 // does simple formatting. That way, deriving classes could decide to fallback 1341 // to that behavior. 1342 TextFormat::FieldValuePrinter::FieldValuePrinter() {} 1343 TextFormat::FieldValuePrinter::~FieldValuePrinter() {} 1344 string TextFormat::FieldValuePrinter::PrintBool(bool val) const { 1345 return val ? "true" : "false"; 1346 } 1347 string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const { 1348 return SimpleItoa(val); 1349 } 1350 string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const { 1351 return SimpleItoa(val); 1352 } 1353 string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const { 1354 return SimpleItoa(val); 1355 } 1356 string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const { 1357 return SimpleItoa(val); 1358 } 1359 string TextFormat::FieldValuePrinter::PrintFloat(float val) const { 1360 return SimpleFtoa(val); 1361 } 1362 string TextFormat::FieldValuePrinter::PrintDouble(double val) const { 1363 return SimpleDtoa(val); 1364 } 1365 string TextFormat::FieldValuePrinter::PrintString(const string& val) const { 1366 string printed("\""); 1367 CEscapeAndAppend(val, &printed); 1368 printed.push_back('\"'); 1369 return printed; 1370 } 1371 string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const { 1372 return PrintString(val); 1373 } 1374 string TextFormat::FieldValuePrinter::PrintEnum(int32 val, 1375 const string& name) const { 1376 return name; 1377 } 1378 string TextFormat::FieldValuePrinter::PrintFieldName( 1379 const Message& message, 1380 const Reflection* reflection, 1381 const FieldDescriptor* field) const { 1382 if (field->is_extension()) { 1383 // We special-case MessageSet elements for compatibility with proto1. 1384 if (field->containing_type()->options().message_set_wire_format() 1385 && field->type() == FieldDescriptor::TYPE_MESSAGE 1386 && field->is_optional() 1387 && field->extension_scope() == field->message_type()) { 1388 return StrCat("[", field->message_type()->full_name(), "]"); 1389 } else { 1390 return StrCat("[", field->full_name(), "]"); 1391 } 1392 } else if (field->type() == FieldDescriptor::TYPE_GROUP) { 1393 // Groups must be serialized with their original capitalization. 1394 return field->message_type()->name(); 1395 } else { 1396 return field->name(); 1397 } 1398 } 1399 string TextFormat::FieldValuePrinter::PrintMessageStart( 1400 const Message& message, 1401 int field_index, 1402 int field_count, 1403 bool single_line_mode) const { 1404 return single_line_mode ? " { " : " {\n"; 1405 } 1406 string TextFormat::FieldValuePrinter::PrintMessageEnd( 1407 const Message& message, 1408 int field_index, 1409 int field_count, 1410 bool single_line_mode) const { 1411 return single_line_mode ? "} " : "}\n"; 1412 } 1413 1414 namespace { 1415 // Our own specialization: for UTF8 escaped strings. 1416 class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter { 1417 public: 1418 virtual string PrintString(const string& val) const { 1419 return StrCat("\"", strings::Utf8SafeCEscape(val), "\""); 1420 } 1421 virtual string PrintBytes(const string& val) const { 1422 return TextFormat::FieldValuePrinter::PrintString(val); 1423 } 1424 }; 1425 1426 } // namespace 1427 1428 TextFormat::Printer::Printer() 1429 : initial_indent_level_(0), 1430 single_line_mode_(false), 1431 use_field_number_(false), 1432 use_short_repeated_primitives_(false), 1433 hide_unknown_fields_(false), 1434 print_message_fields_in_index_order_(false), 1435 expand_any_(false), 1436 truncate_string_field_longer_than_(0LL) { 1437 SetUseUtf8StringEscaping(false); 1438 } 1439 1440 TextFormat::Printer::~Printer() { 1441 STLDeleteValues(&custom_printers_); 1442 } 1443 1444 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) { 1445 SetDefaultFieldValuePrinter(as_utf8 1446 ? new FieldValuePrinterUtf8Escaping() 1447 : new FieldValuePrinter()); 1448 } 1449 1450 void TextFormat::Printer::SetDefaultFieldValuePrinter( 1451 const FieldValuePrinter* printer) { 1452 default_field_value_printer_.reset(printer); 1453 } 1454 1455 bool TextFormat::Printer::RegisterFieldValuePrinter( 1456 const FieldDescriptor* field, 1457 const FieldValuePrinter* printer) { 1458 return field != NULL && printer != NULL && 1459 custom_printers_.insert(std::make_pair(field, printer)).second; 1460 } 1461 1462 bool TextFormat::Printer::PrintToString(const Message& message, 1463 string* output) const { 1464 GOOGLE_DCHECK(output) << "output specified is NULL"; 1465 1466 output->clear(); 1467 io::StringOutputStream output_stream(output); 1468 1469 return Print(message, &output_stream); 1470 } 1471 1472 bool TextFormat::Printer::PrintUnknownFieldsToString( 1473 const UnknownFieldSet& unknown_fields, 1474 string* output) const { 1475 GOOGLE_DCHECK(output) << "output specified is NULL"; 1476 1477 output->clear(); 1478 io::StringOutputStream output_stream(output); 1479 return PrintUnknownFields(unknown_fields, &output_stream); 1480 } 1481 1482 bool TextFormat::Printer::Print(const Message& message, 1483 io::ZeroCopyOutputStream* output) const { 1484 TextGenerator generator(output, initial_indent_level_); 1485 1486 Print(message, generator); 1487 1488 // Output false if the generator failed internally. 1489 return !generator.failed(); 1490 } 1491 1492 bool TextFormat::Printer::PrintUnknownFields( 1493 const UnknownFieldSet& unknown_fields, 1494 io::ZeroCopyOutputStream* output) const { 1495 TextGenerator generator(output, initial_indent_level_); 1496 1497 PrintUnknownFields(unknown_fields, generator); 1498 1499 // Output false if the generator failed internally. 1500 return !generator.failed(); 1501 } 1502 1503 namespace { 1504 // Comparison functor for sorting FieldDescriptors by field index. 1505 struct FieldIndexSorter { 1506 bool operator()(const FieldDescriptor* left, 1507 const FieldDescriptor* right) const { 1508 return left->index() < right->index(); 1509 } 1510 }; 1511 1512 } // namespace 1513 1514 bool TextFormat::Printer::PrintAny(const Message& message, 1515 TextGenerator& generator) const { 1516 const FieldDescriptor* type_url_field; 1517 const FieldDescriptor* value_field; 1518 if (!internal::GetAnyFieldDescriptors(message, &type_url_field, 1519 &value_field)) { 1520 return false; 1521 } 1522 1523 const Reflection* reflection = message.GetReflection(); 1524 1525 // Extract the full type name from the type_url field. 1526 const string& type_url = reflection->GetString(message, type_url_field); 1527 string full_type_name; 1528 if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) { 1529 return false; 1530 } 1531 1532 // Print the "value" in text. 1533 const google::protobuf::Descriptor* value_descriptor = 1534 message.GetDescriptor()->file()->pool()->FindMessageTypeByName( 1535 full_type_name); 1536 if (value_descriptor == NULL) { 1537 GOOGLE_LOG(WARNING) << "Proto type " << type_url << " not found"; 1538 return false; 1539 } 1540 DynamicMessageFactory factory; 1541 google::protobuf::scoped_ptr<google::protobuf::Message> value_message( 1542 factory.GetPrototype(value_descriptor)->New()); 1543 string serialized_value = reflection->GetString(message, value_field); 1544 if (!value_message->ParseFromString(serialized_value)) { 1545 GOOGLE_LOG(WARNING) << type_url << ": failed to parse contents"; 1546 return false; 1547 } 1548 generator.Print(StrCat("[", type_url, "]")); 1549 const FieldValuePrinter* printer = FindWithDefault( 1550 custom_printers_, value_field, default_field_value_printer_.get()); 1551 generator.Print( 1552 printer->PrintMessageStart(message, -1, 0, single_line_mode_)); 1553 generator.Indent(); 1554 Print(*value_message, generator); 1555 generator.Outdent(); 1556 generator.Print(printer->PrintMessageEnd(message, -1, 0, single_line_mode_)); 1557 return true; 1558 } 1559 1560 void TextFormat::Printer::Print(const Message& message, 1561 TextGenerator& generator) const { 1562 const Descriptor* descriptor = message.GetDescriptor(); 1563 const Reflection* reflection = message.GetReflection(); 1564 if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ && 1565 PrintAny(message, generator)) { 1566 return; 1567 } 1568 vector<const FieldDescriptor*> fields; 1569 reflection->ListFields(message, &fields); 1570 if (print_message_fields_in_index_order_) { 1571 std::sort(fields.begin(), fields.end(), FieldIndexSorter()); 1572 } 1573 for (int i = 0; i < fields.size(); i++) { 1574 PrintField(message, reflection, fields[i], generator); 1575 } 1576 if (!hide_unknown_fields_) { 1577 PrintUnknownFields(reflection->GetUnknownFields(message), generator); 1578 } 1579 } 1580 1581 void TextFormat::Printer::PrintFieldValueToString( 1582 const Message& message, 1583 const FieldDescriptor* field, 1584 int index, 1585 string* output) const { 1586 1587 GOOGLE_DCHECK(output) << "output specified is NULL"; 1588 1589 output->clear(); 1590 io::StringOutputStream output_stream(output); 1591 TextGenerator generator(&output_stream, initial_indent_level_); 1592 1593 PrintFieldValue(message, message.GetReflection(), field, index, generator); 1594 } 1595 1596 class MapEntryMessageComparator { 1597 public: 1598 explicit MapEntryMessageComparator(const Descriptor* descriptor) 1599 : field_(descriptor->field(0)) {} 1600 1601 bool operator()(const Message* a, const Message* b) { 1602 const Reflection* reflection = a->GetReflection(); 1603 switch (field_->cpp_type()) { 1604 case FieldDescriptor::CPPTYPE_BOOL: { 1605 bool first = reflection->GetBool(*a, field_); 1606 bool second = reflection->GetBool(*b, field_); 1607 return first < second; 1608 } 1609 case FieldDescriptor::CPPTYPE_INT32: { 1610 int32 first = reflection->GetInt32(*a, field_); 1611 int32 second = reflection->GetInt32(*b, field_); 1612 return first < second; 1613 } 1614 case FieldDescriptor::CPPTYPE_INT64: { 1615 int64 first = reflection->GetInt64(*a, field_); 1616 int64 second = reflection->GetInt64(*b, field_); 1617 return first < second; 1618 } 1619 case FieldDescriptor::CPPTYPE_UINT32: { 1620 uint32 first = reflection->GetUInt32(*a, field_); 1621 uint32 second = reflection->GetUInt32(*b, field_); 1622 return first < second; 1623 } 1624 case FieldDescriptor::CPPTYPE_UINT64: { 1625 uint64 first = reflection->GetUInt64(*a, field_); 1626 uint64 second = reflection->GetUInt64(*b, field_); 1627 return first < second; 1628 } 1629 case FieldDescriptor::CPPTYPE_STRING: { 1630 string first = reflection->GetString(*a, field_); 1631 string second = reflection->GetString(*b, field_); 1632 return first < second; 1633 } 1634 default: 1635 GOOGLE_LOG(DFATAL) << "Invalid key for map field."; 1636 return true; 1637 } 1638 } 1639 1640 private: 1641 const FieldDescriptor* field_; 1642 }; 1643 1644 void TextFormat::Printer::PrintField(const Message& message, 1645 const Reflection* reflection, 1646 const FieldDescriptor* field, 1647 TextGenerator& generator) const { 1648 if (use_short_repeated_primitives_ && 1649 field->is_repeated() && 1650 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING && 1651 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) { 1652 PrintShortRepeatedField(message, reflection, field, generator); 1653 return; 1654 } 1655 1656 int count = 0; 1657 1658 if (field->is_repeated()) { 1659 count = reflection->FieldSize(message, field); 1660 } else if (reflection->HasField(message, field)) { 1661 count = 1; 1662 } 1663 1664 std::vector<const Message*> sorted_map_field; 1665 if (field->is_map()) { 1666 const RepeatedPtrField<Message>& map_field = 1667 reflection->GetRepeatedPtrField<Message>(message, field); 1668 for (RepeatedPtrField<Message>::const_pointer_iterator it = 1669 map_field.pointer_begin(); 1670 it != map_field.pointer_end(); ++it) { 1671 sorted_map_field.push_back(*it); 1672 } 1673 1674 MapEntryMessageComparator comparator(field->message_type()); 1675 std::stable_sort(sorted_map_field.begin(), sorted_map_field.end(), 1676 comparator); 1677 } 1678 1679 for (int j = 0; j < count; ++j) { 1680 const int field_index = field->is_repeated() ? j : -1; 1681 1682 PrintFieldName(message, reflection, field, generator); 1683 1684 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { 1685 const FieldValuePrinter* printer = FindWithDefault( 1686 custom_printers_, field, default_field_value_printer_.get()); 1687 const Message& sub_message = 1688 field->is_repeated() 1689 ? (field->is_map() 1690 ? *sorted_map_field[j] 1691 : reflection->GetRepeatedMessage(message, field, j)) 1692 : reflection->GetMessage(message, field); 1693 generator.Print( 1694 printer->PrintMessageStart( 1695 sub_message, field_index, count, single_line_mode_)); 1696 generator.Indent(); 1697 Print(sub_message, generator); 1698 generator.Outdent(); 1699 generator.Print( 1700 printer->PrintMessageEnd( 1701 sub_message, field_index, count, single_line_mode_)); 1702 } else { 1703 generator.Print(": "); 1704 // Write the field value. 1705 PrintFieldValue(message, reflection, field, field_index, generator); 1706 if (single_line_mode_) { 1707 generator.Print(" "); 1708 } else { 1709 generator.Print("\n"); 1710 } 1711 } 1712 } 1713 } 1714 1715 void TextFormat::Printer::PrintShortRepeatedField( 1716 const Message& message, 1717 const Reflection* reflection, 1718 const FieldDescriptor* field, 1719 TextGenerator& generator) const { 1720 // Print primitive repeated field in short form. 1721 PrintFieldName(message, reflection, field, generator); 1722 1723 int size = reflection->FieldSize(message, field); 1724 generator.Print(": ["); 1725 for (int i = 0; i < size; i++) { 1726 if (i > 0) generator.Print(", "); 1727 PrintFieldValue(message, reflection, field, i, generator); 1728 } 1729 if (single_line_mode_) { 1730 generator.Print("] "); 1731 } else { 1732 generator.Print("]\n"); 1733 } 1734 } 1735 1736 void TextFormat::Printer::PrintFieldName(const Message& message, 1737 const Reflection* reflection, 1738 const FieldDescriptor* field, 1739 TextGenerator& generator) const { 1740 // if use_field_number_ is true, prints field number instead 1741 // of field name. 1742 if (use_field_number_) { 1743 generator.Print(SimpleItoa(field->number())); 1744 return; 1745 } 1746 1747 const FieldValuePrinter* printer = FindWithDefault( 1748 custom_printers_, field, default_field_value_printer_.get()); 1749 generator.Print(printer->PrintFieldName(message, reflection, field)); 1750 } 1751 1752 void TextFormat::Printer::PrintFieldValue( 1753 const Message& message, 1754 const Reflection* reflection, 1755 const FieldDescriptor* field, 1756 int index, 1757 TextGenerator& generator) const { 1758 GOOGLE_DCHECK(field->is_repeated() || (index == -1)) 1759 << "Index must be -1 for non-repeated fields"; 1760 1761 const FieldValuePrinter* printer 1762 = FindWithDefault(custom_printers_, field, 1763 default_field_value_printer_.get()); 1764 1765 switch (field->cpp_type()) { 1766 #define OUTPUT_FIELD(CPPTYPE, METHOD) \ 1767 case FieldDescriptor::CPPTYPE_##CPPTYPE: \ 1768 generator.Print(printer->Print##METHOD(field->is_repeated() \ 1769 ? reflection->GetRepeated##METHOD(message, field, index) \ 1770 : reflection->Get##METHOD(message, field))); \ 1771 break 1772 1773 OUTPUT_FIELD( INT32, Int32); 1774 OUTPUT_FIELD( INT64, Int64); 1775 OUTPUT_FIELD(UINT32, UInt32); 1776 OUTPUT_FIELD(UINT64, UInt64); 1777 OUTPUT_FIELD( FLOAT, Float); 1778 OUTPUT_FIELD(DOUBLE, Double); 1779 OUTPUT_FIELD( BOOL, Bool); 1780 #undef OUTPUT_FIELD 1781 1782 case FieldDescriptor::CPPTYPE_STRING: { 1783 string scratch; 1784 const string& value = field->is_repeated() 1785 ? reflection->GetRepeatedStringReference( 1786 message, field, index, &scratch) 1787 : reflection->GetStringReference(message, field, &scratch); 1788 const string* value_to_print = &value; 1789 string truncated_value; 1790 if (truncate_string_field_longer_than_ > 0 && 1791 truncate_string_field_longer_than_ < value.size()) { 1792 truncated_value = value.substr(0, truncate_string_field_longer_than_) + 1793 "...<truncated>..."; 1794 value_to_print = &truncated_value; 1795 } 1796 if (field->type() == FieldDescriptor::TYPE_STRING) { 1797 generator.Print(printer->PrintString(*value_to_print)); 1798 } else { 1799 GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES); 1800 generator.Print(printer->PrintBytes(*value_to_print)); 1801 } 1802 break; 1803 } 1804 1805 case FieldDescriptor::CPPTYPE_ENUM: { 1806 int enum_value = field->is_repeated() 1807 ? reflection->GetRepeatedEnumValue(message, field, index) 1808 : reflection->GetEnumValue(message, field); 1809 const EnumValueDescriptor* enum_desc = 1810 field->enum_type()->FindValueByNumber(enum_value); 1811 if (enum_desc != NULL) { 1812 generator.Print(printer->PrintEnum(enum_value, enum_desc->name())); 1813 } else { 1814 // Ordinarily, enum_desc should not be null, because proto2 has the 1815 // invariant that set enum field values must be in-range, but with the 1816 // new integer-based API for enums (or the RepeatedField<int> loophole), 1817 // it is possible for the user to force an unknown integer value. So we 1818 // simply use the integer value itself as the enum value name in this 1819 // case. 1820 generator.Print(printer->PrintEnum(enum_value, 1821 StringPrintf("%d", enum_value))); 1822 } 1823 break; 1824 } 1825 1826 case FieldDescriptor::CPPTYPE_MESSAGE: 1827 Print(field->is_repeated() 1828 ? reflection->GetRepeatedMessage(message, field, index) 1829 : reflection->GetMessage(message, field), 1830 generator); 1831 break; 1832 } 1833 } 1834 1835 /* static */ bool TextFormat::Print(const Message& message, 1836 io::ZeroCopyOutputStream* output) { 1837 return Printer().Print(message, output); 1838 } 1839 1840 /* static */ bool TextFormat::PrintUnknownFields( 1841 const UnknownFieldSet& unknown_fields, 1842 io::ZeroCopyOutputStream* output) { 1843 return Printer().PrintUnknownFields(unknown_fields, output); 1844 } 1845 1846 /* static */ bool TextFormat::PrintToString( 1847 const Message& message, string* output) { 1848 return Printer().PrintToString(message, output); 1849 } 1850 1851 /* static */ bool TextFormat::PrintUnknownFieldsToString( 1852 const UnknownFieldSet& unknown_fields, string* output) { 1853 return Printer().PrintUnknownFieldsToString(unknown_fields, output); 1854 } 1855 1856 /* static */ void TextFormat::PrintFieldValueToString( 1857 const Message& message, 1858 const FieldDescriptor* field, 1859 int index, 1860 string* output) { 1861 return Printer().PrintFieldValueToString(message, field, index, output); 1862 } 1863 1864 /* static */ bool TextFormat::ParseFieldValueFromString( 1865 const string& input, 1866 const FieldDescriptor* field, 1867 Message* message) { 1868 return Parser().ParseFieldValueFromString(input, field, message); 1869 } 1870 1871 // Prints an integer as hex with a fixed number of digits dependent on the 1872 // integer type. 1873 template<typename IntType> 1874 static string PaddedHex(IntType value) { 1875 string result; 1876 result.reserve(sizeof(value) * 2); 1877 for (int i = sizeof(value) * 2 - 1; i >= 0; i--) { 1878 result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F)); 1879 } 1880 return result; 1881 } 1882 1883 void TextFormat::Printer::PrintUnknownFields( 1884 const UnknownFieldSet& unknown_fields, TextGenerator& generator) const { 1885 for (int i = 0; i < unknown_fields.field_count(); i++) { 1886 const UnknownField& field = unknown_fields.field(i); 1887 string field_number = SimpleItoa(field.number()); 1888 1889 switch (field.type()) { 1890 case UnknownField::TYPE_VARINT: 1891 generator.Print(field_number); 1892 generator.Print(": "); 1893 generator.Print(SimpleItoa(field.varint())); 1894 if (single_line_mode_) { 1895 generator.Print(" "); 1896 } else { 1897 generator.Print("\n"); 1898 } 1899 break; 1900 case UnknownField::TYPE_FIXED32: { 1901 generator.Print(field_number); 1902 generator.Print(": 0x"); 1903 generator.Print( 1904 StrCat(strings::Hex(field.fixed32(), strings::ZERO_PAD_8))); 1905 if (single_line_mode_) { 1906 generator.Print(" "); 1907 } else { 1908 generator.Print("\n"); 1909 } 1910 break; 1911 } 1912 case UnknownField::TYPE_FIXED64: { 1913 generator.Print(field_number); 1914 generator.Print(": 0x"); 1915 generator.Print( 1916 StrCat(strings::Hex(field.fixed64(), strings::ZERO_PAD_16))); 1917 if (single_line_mode_) { 1918 generator.Print(" "); 1919 } else { 1920 generator.Print("\n"); 1921 } 1922 break; 1923 } 1924 case UnknownField::TYPE_LENGTH_DELIMITED: { 1925 generator.Print(field_number); 1926 const string& value = field.length_delimited(); 1927 UnknownFieldSet embedded_unknown_fields; 1928 if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) { 1929 // This field is parseable as a Message. 1930 // So it is probably an embedded message. 1931 if (single_line_mode_) { 1932 generator.Print(" { "); 1933 } else { 1934 generator.Print(" {\n"); 1935 generator.Indent(); 1936 } 1937 PrintUnknownFields(embedded_unknown_fields, generator); 1938 if (single_line_mode_) { 1939 generator.Print("} "); 1940 } else { 1941 generator.Outdent(); 1942 generator.Print("}\n"); 1943 } 1944 } else { 1945 // This field is not parseable as a Message. 1946 // So it is probably just a plain string. 1947 string printed(": \""); 1948 CEscapeAndAppend(value, &printed); 1949 printed.append(single_line_mode_ ? "\" " : "\"\n"); 1950 generator.Print(printed); 1951 } 1952 break; 1953 } 1954 case UnknownField::TYPE_GROUP: 1955 generator.Print(field_number); 1956 if (single_line_mode_) { 1957 generator.Print(" { "); 1958 } else { 1959 generator.Print(" {\n"); 1960 generator.Indent(); 1961 } 1962 PrintUnknownFields(field.group(), generator); 1963 if (single_line_mode_) { 1964 generator.Print("} "); 1965 } else { 1966 generator.Outdent(); 1967 generator.Print("}\n"); 1968 } 1969 break; 1970 } 1971 } 1972 } 1973 1974 } // namespace protobuf 1975 } // namespace google 1976