1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // Recursive descent FTW. 36 37 #include <float.h> 38 #include <google/protobuf/stubs/hash.h> 39 #include <limits> 40 41 42 #include <google/protobuf/compiler/parser.h> 43 #include <google/protobuf/descriptor.h> 44 #include <google/protobuf/descriptor.pb.h> 45 #include <google/protobuf/wire_format.h> 46 #include <google/protobuf/io/tokenizer.h> 47 #include <google/protobuf/stubs/common.h> 48 #include <google/protobuf/stubs/strutil.h> 49 #include <google/protobuf/stubs/map-util.h> 50 51 namespace google { 52 namespace protobuf { 53 namespace compiler { 54 55 using internal::WireFormat; 56 57 namespace { 58 59 typedef hash_map<string, FieldDescriptorProto::Type> TypeNameMap; 60 61 TypeNameMap MakeTypeNameTable() { 62 TypeNameMap result; 63 64 result["double" ] = FieldDescriptorProto::TYPE_DOUBLE; 65 result["float" ] = FieldDescriptorProto::TYPE_FLOAT; 66 result["uint64" ] = FieldDescriptorProto::TYPE_UINT64; 67 result["fixed64" ] = FieldDescriptorProto::TYPE_FIXED64; 68 result["fixed32" ] = FieldDescriptorProto::TYPE_FIXED32; 69 result["bool" ] = FieldDescriptorProto::TYPE_BOOL; 70 result["string" ] = FieldDescriptorProto::TYPE_STRING; 71 result["group" ] = FieldDescriptorProto::TYPE_GROUP; 72 73 result["bytes" ] = FieldDescriptorProto::TYPE_BYTES; 74 result["uint32" ] = FieldDescriptorProto::TYPE_UINT32; 75 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32; 76 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64; 77 result["int32" ] = FieldDescriptorProto::TYPE_INT32; 78 result["int64" ] = FieldDescriptorProto::TYPE_INT64; 79 result["sint32" ] = FieldDescriptorProto::TYPE_SINT32; 80 result["sint64" ] = FieldDescriptorProto::TYPE_SINT64; 81 82 return result; 83 } 84 85 const TypeNameMap kTypeNames = MakeTypeNameTable(); 86 87 } // anonymous namespace 88 89 // Makes code slightly more readable. The meaning of "DO(foo)" is 90 // "Execute foo and fail if it fails.", where failure is indicated by 91 // returning false. 92 #define DO(STATEMENT) if (STATEMENT) {} else return false 93 94 // =================================================================== 95 96 Parser::Parser() 97 : input_(NULL), 98 error_collector_(NULL), 99 source_location_table_(NULL), 100 had_errors_(false), 101 require_syntax_identifier_(false), 102 stop_after_syntax_identifier_(false) { 103 } 104 105 Parser::~Parser() { 106 } 107 108 // =================================================================== 109 110 inline bool Parser::LookingAt(const char* text) { 111 return input_->current().text == text; 112 } 113 114 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) { 115 return input_->current().type == token_type; 116 } 117 118 inline bool Parser::AtEnd() { 119 return LookingAtType(io::Tokenizer::TYPE_END); 120 } 121 122 bool Parser::TryConsume(const char* text) { 123 if (LookingAt(text)) { 124 input_->Next(); 125 return true; 126 } else { 127 return false; 128 } 129 } 130 131 bool Parser::Consume(const char* text, const char* error) { 132 if (TryConsume(text)) { 133 return true; 134 } else { 135 AddError(error); 136 return false; 137 } 138 } 139 140 bool Parser::Consume(const char* text) { 141 if (TryConsume(text)) { 142 return true; 143 } else { 144 AddError("Expected \"" + string(text) + "\"."); 145 return false; 146 } 147 } 148 149 bool Parser::ConsumeIdentifier(string* output, const char* error) { 150 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 151 *output = input_->current().text; 152 input_->Next(); 153 return true; 154 } else { 155 AddError(error); 156 return false; 157 } 158 } 159 160 bool Parser::ConsumeInteger(int* output, const char* error) { 161 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 162 uint64 value = 0; 163 if (!io::Tokenizer::ParseInteger(input_->current().text, 164 kint32max, &value)) { 165 AddError("Integer out of range."); 166 // We still return true because we did, in fact, parse an integer. 167 } 168 *output = value; 169 input_->Next(); 170 return true; 171 } else { 172 AddError(error); 173 return false; 174 } 175 } 176 177 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output, 178 const char* error) { 179 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 180 if (!io::Tokenizer::ParseInteger(input_->current().text, max_value, 181 output)) { 182 AddError("Integer out of range."); 183 // We still return true because we did, in fact, parse an integer. 184 *output = 0; 185 } 186 input_->Next(); 187 return true; 188 } else { 189 AddError(error); 190 return false; 191 } 192 } 193 194 bool Parser::ConsumeNumber(double* output, const char* error) { 195 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) { 196 *output = io::Tokenizer::ParseFloat(input_->current().text); 197 input_->Next(); 198 return true; 199 } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) { 200 // Also accept integers. 201 uint64 value = 0; 202 if (!io::Tokenizer::ParseInteger(input_->current().text, 203 kuint64max, &value)) { 204 AddError("Integer out of range."); 205 // We still return true because we did, in fact, parse a number. 206 } 207 *output = value; 208 input_->Next(); 209 return true; 210 } else if (LookingAt("inf")) { 211 *output = numeric_limits<double>::infinity(); 212 input_->Next(); 213 return true; 214 } else if (LookingAt("nan")) { 215 *output = numeric_limits<double>::quiet_NaN(); 216 input_->Next(); 217 return true; 218 } else { 219 AddError(error); 220 return false; 221 } 222 } 223 224 bool Parser::ConsumeString(string* output, const char* error) { 225 if (LookingAtType(io::Tokenizer::TYPE_STRING)) { 226 io::Tokenizer::ParseString(input_->current().text, output); 227 input_->Next(); 228 // Allow C++ like concatenation of adjacent string tokens. 229 while (LookingAtType(io::Tokenizer::TYPE_STRING)) { 230 io::Tokenizer::ParseStringAppend(input_->current().text, output); 231 input_->Next(); 232 } 233 return true; 234 } else { 235 AddError(error); 236 return false; 237 } 238 } 239 240 // ------------------------------------------------------------------- 241 242 void Parser::AddError(int line, int column, const string& error) { 243 if (error_collector_ != NULL) { 244 error_collector_->AddError(line, column, error); 245 } 246 had_errors_ = true; 247 } 248 249 void Parser::AddError(const string& error) { 250 AddError(input_->current().line, input_->current().column, error); 251 } 252 253 void Parser::RecordLocation( 254 const Message* descriptor, 255 DescriptorPool::ErrorCollector::ErrorLocation location, 256 int line, int column) { 257 if (source_location_table_ != NULL) { 258 source_location_table_->Add(descriptor, location, line, column); 259 } 260 } 261 262 void Parser::RecordLocation( 263 const Message* descriptor, 264 DescriptorPool::ErrorCollector::ErrorLocation location) { 265 RecordLocation(descriptor, location, 266 input_->current().line, input_->current().column); 267 } 268 269 // ------------------------------------------------------------------- 270 271 void Parser::SkipStatement() { 272 while (true) { 273 if (AtEnd()) { 274 return; 275 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) { 276 if (TryConsume(";")) { 277 return; 278 } else if (TryConsume("{")) { 279 SkipRestOfBlock(); 280 return; 281 } else if (LookingAt("}")) { 282 return; 283 } 284 } 285 input_->Next(); 286 } 287 } 288 289 void Parser::SkipRestOfBlock() { 290 while (true) { 291 if (AtEnd()) { 292 return; 293 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) { 294 if (TryConsume("}")) { 295 return; 296 } else if (TryConsume("{")) { 297 SkipRestOfBlock(); 298 } 299 } 300 input_->Next(); 301 } 302 } 303 304 // =================================================================== 305 306 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) { 307 input_ = input; 308 had_errors_ = false; 309 syntax_identifier_.clear(); 310 311 if (LookingAtType(io::Tokenizer::TYPE_START)) { 312 // Advance to first token. 313 input_->Next(); 314 } 315 316 if (require_syntax_identifier_ || LookingAt("syntax")) { 317 if (!ParseSyntaxIdentifier()) { 318 // Don't attempt to parse the file if we didn't recognize the syntax 319 // identifier. 320 return false; 321 } 322 } else if (!stop_after_syntax_identifier_) { 323 syntax_identifier_ = "proto2"; 324 } 325 326 if (stop_after_syntax_identifier_) return !had_errors_; 327 328 // Repeatedly parse statements until we reach the end of the file. 329 while (!AtEnd()) { 330 if (!ParseTopLevelStatement(file)) { 331 // This statement failed to parse. Skip it, but keep looping to parse 332 // other statements. 333 SkipStatement(); 334 335 if (LookingAt("}")) { 336 AddError("Unmatched \"}\"."); 337 input_->Next(); 338 } 339 } 340 } 341 342 input_ = NULL; 343 return !had_errors_; 344 } 345 346 bool Parser::ParseSyntaxIdentifier() { 347 DO(Consume("syntax", "File must begin with 'syntax = \"proto2\";'.")); 348 DO(Consume("=")); 349 io::Tokenizer::Token syntax_token = input_->current(); 350 string syntax; 351 DO(ConsumeString(&syntax, "Expected syntax identifier.")); 352 DO(Consume(";")); 353 354 syntax_identifier_ = syntax; 355 356 if (syntax != "proto2" && !stop_after_syntax_identifier_) { 357 AddError(syntax_token.line, syntax_token.column, 358 "Unrecognized syntax identifier \"" + syntax + "\". This parser " 359 "only recognizes \"proto2\"."); 360 return false; 361 } 362 363 return true; 364 } 365 366 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file) { 367 if (TryConsume(";")) { 368 // empty statement; ignore 369 return true; 370 } else if (LookingAt("message")) { 371 return ParseMessageDefinition(file->add_message_type()); 372 } else if (LookingAt("enum")) { 373 return ParseEnumDefinition(file->add_enum_type()); 374 } else if (LookingAt("service")) { 375 return ParseServiceDefinition(file->add_service()); 376 } else if (LookingAt("extend")) { 377 return ParseExtend(file->mutable_extension(), 378 file->mutable_message_type()); 379 } else if (LookingAt("import")) { 380 return ParseImport(file->add_dependency()); 381 } else if (LookingAt("package")) { 382 return ParsePackage(file); 383 } else if (LookingAt("option")) { 384 return ParseOption(file->mutable_options()); 385 } else { 386 AddError("Expected top-level statement (e.g. \"message\")."); 387 return false; 388 } 389 } 390 391 // ------------------------------------------------------------------- 392 // Messages 393 394 bool Parser::ParseMessageDefinition(DescriptorProto* message) { 395 DO(Consume("message")); 396 RecordLocation(message, DescriptorPool::ErrorCollector::NAME); 397 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name.")); 398 DO(ParseMessageBlock(message)); 399 return true; 400 } 401 402 bool Parser::ParseMessageBlock(DescriptorProto* message) { 403 DO(Consume("{")); 404 405 while (!TryConsume("}")) { 406 if (AtEnd()) { 407 AddError("Reached end of input in message definition (missing '}')."); 408 return false; 409 } 410 411 if (!ParseMessageStatement(message)) { 412 // This statement failed to parse. Skip it, but keep looping to parse 413 // other statements. 414 SkipStatement(); 415 } 416 } 417 418 return true; 419 } 420 421 bool Parser::ParseMessageStatement(DescriptorProto* message) { 422 if (TryConsume(";")) { 423 // empty statement; ignore 424 return true; 425 } else if (LookingAt("message")) { 426 return ParseMessageDefinition(message->add_nested_type()); 427 } else if (LookingAt("enum")) { 428 return ParseEnumDefinition(message->add_enum_type()); 429 } else if (LookingAt("extensions")) { 430 return ParseExtensions(message); 431 } else if (LookingAt("extend")) { 432 return ParseExtend(message->mutable_extension(), 433 message->mutable_nested_type()); 434 } else if (LookingAt("option")) { 435 return ParseOption(message->mutable_options()); 436 } else { 437 return ParseMessageField(message->add_field(), 438 message->mutable_nested_type()); 439 } 440 } 441 442 bool Parser::ParseMessageField(FieldDescriptorProto* field, 443 RepeatedPtrField<DescriptorProto>* messages) { 444 // Parse label and type. 445 FieldDescriptorProto::Label label; 446 DO(ParseLabel(&label)); 447 field->set_label(label); 448 449 RecordLocation(field, DescriptorPool::ErrorCollector::TYPE); 450 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32; 451 string type_name; 452 DO(ParseType(&type, &type_name)); 453 if (type_name.empty()) { 454 field->set_type(type); 455 } else { 456 field->set_type_name(type_name); 457 } 458 459 // Parse name and '='. 460 RecordLocation(field, DescriptorPool::ErrorCollector::NAME); 461 io::Tokenizer::Token name_token = input_->current(); 462 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name.")); 463 DO(Consume("=", "Missing field number.")); 464 465 // Parse field number. 466 RecordLocation(field, DescriptorPool::ErrorCollector::NUMBER); 467 int number; 468 DO(ConsumeInteger(&number, "Expected field number.")); 469 field->set_number(number); 470 471 // Parse options. 472 DO(ParseFieldOptions(field)); 473 474 // Deal with groups. 475 if (type_name.empty() && type == FieldDescriptorProto::TYPE_GROUP) { 476 DescriptorProto* group = messages->Add(); 477 group->set_name(field->name()); 478 // Record name location to match the field name's location. 479 RecordLocation(group, DescriptorPool::ErrorCollector::NAME, 480 name_token.line, name_token.column); 481 482 // As a hack for backwards-compatibility, we force the group name to start 483 // with a capital letter and lower-case the field name. New code should 484 // not use groups; it should use nested messages. 485 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) { 486 AddError(name_token.line, name_token.column, 487 "Group names must start with a capital letter."); 488 } 489 LowerString(field->mutable_name()); 490 491 field->set_type_name(group->name()); 492 if (LookingAt("{")) { 493 DO(ParseMessageBlock(group)); 494 } else { 495 AddError("Missing group body."); 496 return false; 497 } 498 } else { 499 DO(Consume(";")); 500 } 501 502 return true; 503 } 504 505 bool Parser::ParseFieldOptions(FieldDescriptorProto* field) { 506 if (!TryConsume("[")) return true; 507 508 // Parse field options. 509 do { 510 if (LookingAt("default")) { 511 DO(ParseDefaultAssignment(field)); 512 } else { 513 DO(ParseOptionAssignment(field->mutable_options())); 514 } 515 } while (TryConsume(",")); 516 517 DO(Consume("]")); 518 return true; 519 } 520 521 bool Parser::ParseDefaultAssignment(FieldDescriptorProto* field) { 522 if (field->has_default_value()) { 523 AddError("Already set option \"default\"."); 524 field->clear_default_value(); 525 } 526 527 DO(Consume("default")); 528 DO(Consume("=")); 529 530 RecordLocation(field, DescriptorPool::ErrorCollector::DEFAULT_VALUE); 531 string* default_value = field->mutable_default_value(); 532 533 if (!field->has_type()) { 534 // The field has a type name, but we don't know if it is a message or an 535 // enum yet. Assume an enum for now. 536 DO(ConsumeIdentifier(default_value, "Expected identifier.")); 537 return true; 538 } 539 540 switch (field->type()) { 541 case FieldDescriptorProto::TYPE_INT32: 542 case FieldDescriptorProto::TYPE_INT64: 543 case FieldDescriptorProto::TYPE_SINT32: 544 case FieldDescriptorProto::TYPE_SINT64: 545 case FieldDescriptorProto::TYPE_SFIXED32: 546 case FieldDescriptorProto::TYPE_SFIXED64: { 547 uint64 max_value = kint64max; 548 if (field->type() == FieldDescriptorProto::TYPE_INT32 || 549 field->type() == FieldDescriptorProto::TYPE_SINT32 || 550 field->type() == FieldDescriptorProto::TYPE_SFIXED32) { 551 max_value = kint32max; 552 } 553 554 // These types can be negative. 555 if (TryConsume("-")) { 556 default_value->append("-"); 557 // Two's complement always has one more negative value than positive. 558 ++max_value; 559 } 560 // Parse the integer to verify that it is not out-of-range. 561 uint64 value; 562 DO(ConsumeInteger64(max_value, &value, "Expected integer.")); 563 // And stringify it again. 564 default_value->append(SimpleItoa(value)); 565 break; 566 } 567 568 case FieldDescriptorProto::TYPE_UINT32: 569 case FieldDescriptorProto::TYPE_UINT64: 570 case FieldDescriptorProto::TYPE_FIXED32: 571 case FieldDescriptorProto::TYPE_FIXED64: { 572 uint64 max_value = kuint64max; 573 if (field->type() == FieldDescriptorProto::TYPE_UINT32 || 574 field->type() == FieldDescriptorProto::TYPE_FIXED32) { 575 max_value = kuint32max; 576 } 577 578 // Numeric, not negative. 579 if (TryConsume("-")) { 580 AddError("Unsigned field can't have negative default value."); 581 } 582 // Parse the integer to verify that it is not out-of-range. 583 uint64 value; 584 DO(ConsumeInteger64(max_value, &value, "Expected integer.")); 585 // And stringify it again. 586 default_value->append(SimpleItoa(value)); 587 break; 588 } 589 590 case FieldDescriptorProto::TYPE_FLOAT: 591 case FieldDescriptorProto::TYPE_DOUBLE: 592 // These types can be negative. 593 if (TryConsume("-")) { 594 default_value->append("-"); 595 } 596 // Parse the integer because we have to convert hex integers to decimal 597 // floats. 598 double value; 599 DO(ConsumeNumber(&value, "Expected number.")); 600 // And stringify it again. 601 default_value->append(SimpleDtoa(value)); 602 break; 603 604 case FieldDescriptorProto::TYPE_BOOL: 605 if (TryConsume("true")) { 606 default_value->assign("true"); 607 } else if (TryConsume("false")) { 608 default_value->assign("false"); 609 } else { 610 AddError("Expected \"true\" or \"false\"."); 611 return false; 612 } 613 break; 614 615 case FieldDescriptorProto::TYPE_STRING: 616 DO(ConsumeString(default_value, "Expected string.")); 617 break; 618 619 case FieldDescriptorProto::TYPE_BYTES: 620 DO(ConsumeString(default_value, "Expected string.")); 621 *default_value = CEscape(*default_value); 622 break; 623 624 case FieldDescriptorProto::TYPE_ENUM: 625 DO(ConsumeIdentifier(default_value, "Expected identifier.")); 626 break; 627 628 case FieldDescriptorProto::TYPE_MESSAGE: 629 case FieldDescriptorProto::TYPE_GROUP: 630 AddError("Messages can't have default values."); 631 return false; 632 } 633 634 return true; 635 } 636 637 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option) { 638 UninterpretedOption::NamePart* name = uninterpreted_option->add_name(); 639 string identifier; // We parse identifiers into this string. 640 if (LookingAt("(")) { // This is an extension. 641 DO(Consume("(")); 642 // An extension name consists of dot-separated identifiers, and may begin 643 // with a dot. 644 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) { 645 DO(ConsumeIdentifier(&identifier, "Expected identifier.")); 646 name->mutable_name_part()->append(identifier); 647 } 648 while (LookingAt(".")) { 649 DO(Consume(".")); 650 name->mutable_name_part()->append("."); 651 DO(ConsumeIdentifier(&identifier, "Expected identifier.")); 652 name->mutable_name_part()->append(identifier); 653 } 654 DO(Consume(")")); 655 name->set_is_extension(true); 656 } else { // This is a regular field. 657 DO(ConsumeIdentifier(&identifier, "Expected identifier.")); 658 name->mutable_name_part()->append(identifier); 659 name->set_is_extension(false); 660 } 661 return true; 662 } 663 664 // We don't interpret the option here. Instead we store it in an 665 // UninterpretedOption, to be interpreted later. 666 bool Parser::ParseOptionAssignment(Message* options) { 667 // Create an entry in the uninterpreted_option field. 668 const FieldDescriptor* uninterpreted_option_field = options->GetDescriptor()-> 669 FindFieldByName("uninterpreted_option"); 670 GOOGLE_CHECK(uninterpreted_option_field != NULL) 671 << "No field named \"uninterpreted_option\" in the Options proto."; 672 673 UninterpretedOption* uninterpreted_option = down_cast<UninterpretedOption*>( 674 options->GetReflection()->AddMessage(options, 675 uninterpreted_option_field)); 676 677 // Parse dot-separated name. 678 RecordLocation(uninterpreted_option, 679 DescriptorPool::ErrorCollector::OPTION_NAME); 680 681 DO(ParseOptionNamePart(uninterpreted_option)); 682 683 while (LookingAt(".")) { 684 DO(Consume(".")); 685 DO(ParseOptionNamePart(uninterpreted_option)); 686 } 687 688 DO(Consume("=")); 689 690 RecordLocation(uninterpreted_option, 691 DescriptorPool::ErrorCollector::OPTION_VALUE); 692 693 // All values are a single token, except for negative numbers, which consist 694 // of a single '-' symbol, followed by a positive number. 695 bool is_negative = TryConsume("-"); 696 697 switch (input_->current().type) { 698 case io::Tokenizer::TYPE_START: 699 GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read."; 700 return false; 701 702 case io::Tokenizer::TYPE_END: 703 AddError("Unexpected end of stream while parsing option value."); 704 return false; 705 706 case io::Tokenizer::TYPE_IDENTIFIER: { 707 if (is_negative) { 708 AddError("Invalid '-' symbol before identifier."); 709 return false; 710 } 711 string value; 712 DO(ConsumeIdentifier(&value, "Expected identifier.")); 713 uninterpreted_option->set_identifier_value(value); 714 break; 715 } 716 717 case io::Tokenizer::TYPE_INTEGER: { 718 uint64 value; 719 uint64 max_value = 720 is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max; 721 DO(ConsumeInteger64(max_value, &value, "Expected integer.")); 722 if (is_negative) { 723 uninterpreted_option->set_negative_int_value(-value); 724 } else { 725 uninterpreted_option->set_positive_int_value(value); 726 } 727 break; 728 } 729 730 case io::Tokenizer::TYPE_FLOAT: { 731 double value; 732 DO(ConsumeNumber(&value, "Expected number.")); 733 uninterpreted_option->set_double_value(is_negative ? -value : value); 734 break; 735 } 736 737 case io::Tokenizer::TYPE_STRING: { 738 if (is_negative) { 739 AddError("Invalid '-' symbol before string."); 740 return false; 741 } 742 string value; 743 DO(ConsumeString(&value, "Expected string.")); 744 uninterpreted_option->set_string_value(value); 745 break; 746 } 747 748 case io::Tokenizer::TYPE_SYMBOL: 749 AddError("Expected option value."); 750 return false; 751 } 752 753 return true; 754 } 755 756 bool Parser::ParseExtensions(DescriptorProto* message) { 757 // Parse the declaration. 758 DO(Consume("extensions")); 759 760 do { 761 DescriptorProto::ExtensionRange* range = message->add_extension_range(); 762 RecordLocation(range, DescriptorPool::ErrorCollector::NUMBER); 763 764 int start, end; 765 DO(ConsumeInteger(&start, "Expected field number range.")); 766 767 if (TryConsume("to")) { 768 if (TryConsume("max")) { 769 end = FieldDescriptor::kMaxNumber; 770 } else { 771 DO(ConsumeInteger(&end, "Expected integer.")); 772 } 773 } else { 774 end = start; 775 } 776 777 // Users like to specify inclusive ranges, but in code we like the end 778 // number to be exclusive. 779 ++end; 780 781 range->set_start(start); 782 range->set_end(end); 783 } while (TryConsume(",")); 784 785 DO(Consume(";")); 786 return true; 787 } 788 789 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions, 790 RepeatedPtrField<DescriptorProto>* messages) { 791 DO(Consume("extend")); 792 793 // We expect to see at least one extension field defined in the extend block. 794 // We need to create it now so we can record the extendee's location. 795 FieldDescriptorProto* first_field = extensions->Add(); 796 797 // Parse the extendee type. 798 RecordLocation(first_field, DescriptorPool::ErrorCollector::EXTENDEE); 799 DO(ParseUserDefinedType(first_field->mutable_extendee())); 800 801 // Parse the block. 802 DO(Consume("{")); 803 804 bool is_first = true; 805 806 do { 807 if (AtEnd()) { 808 AddError("Reached end of input in extend definition (missing '}')."); 809 return false; 810 } 811 812 FieldDescriptorProto* field; 813 if (is_first) { 814 field = first_field; 815 is_first = false; 816 } else { 817 field = extensions->Add(); 818 field->set_extendee(first_field->extendee()); 819 } 820 821 if (!ParseMessageField(field, messages)) { 822 // This statement failed to parse. Skip it, but keep looping to parse 823 // other statements. 824 SkipStatement(); 825 } 826 } while(!TryConsume("}")); 827 828 return true; 829 } 830 831 // ------------------------------------------------------------------- 832 // Enums 833 834 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type) { 835 DO(Consume("enum")); 836 RecordLocation(enum_type, DescriptorPool::ErrorCollector::NAME); 837 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name.")); 838 DO(ParseEnumBlock(enum_type)); 839 return true; 840 } 841 842 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type) { 843 DO(Consume("{")); 844 845 while (!TryConsume("}")) { 846 if (AtEnd()) { 847 AddError("Reached end of input in enum definition (missing '}')."); 848 return false; 849 } 850 851 if (!ParseEnumStatement(enum_type)) { 852 // This statement failed to parse. Skip it, but keep looping to parse 853 // other statements. 854 SkipStatement(); 855 } 856 } 857 858 return true; 859 } 860 861 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type) { 862 if (TryConsume(";")) { 863 // empty statement; ignore 864 return true; 865 } else if (LookingAt("option")) { 866 return ParseOption(enum_type->mutable_options()); 867 } else { 868 return ParseEnumConstant(enum_type->add_value()); 869 } 870 } 871 872 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value) { 873 RecordLocation(enum_value, DescriptorPool::ErrorCollector::NAME); 874 DO(ConsumeIdentifier(enum_value->mutable_name(), 875 "Expected enum constant name.")); 876 DO(Consume("=", "Missing numeric value for enum constant.")); 877 878 bool is_negative = TryConsume("-"); 879 int number; 880 DO(ConsumeInteger(&number, "Expected integer.")); 881 if (is_negative) number *= -1; 882 enum_value->set_number(number); 883 884 DO(ParseEnumConstantOptions(enum_value)); 885 886 DO(Consume(";")); 887 888 return true; 889 } 890 891 bool Parser::ParseEnumConstantOptions(EnumValueDescriptorProto* value) { 892 if (!TryConsume("[")) return true; 893 894 do { 895 DO(ParseOptionAssignment(value->mutable_options())); 896 } while (TryConsume(",")); 897 898 DO(Consume("]")); 899 return true; 900 } 901 902 // ------------------------------------------------------------------- 903 // Services 904 905 bool Parser::ParseServiceDefinition(ServiceDescriptorProto* service) { 906 DO(Consume("service")); 907 RecordLocation(service, DescriptorPool::ErrorCollector::NAME); 908 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name.")); 909 DO(ParseServiceBlock(service)); 910 return true; 911 } 912 913 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service) { 914 DO(Consume("{")); 915 916 while (!TryConsume("}")) { 917 if (AtEnd()) { 918 AddError("Reached end of input in service definition (missing '}')."); 919 return false; 920 } 921 922 if (!ParseServiceStatement(service)) { 923 // This statement failed to parse. Skip it, but keep looping to parse 924 // other statements. 925 SkipStatement(); 926 } 927 } 928 929 return true; 930 } 931 932 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service) { 933 if (TryConsume(";")) { 934 // empty statement; ignore 935 return true; 936 } else if (LookingAt("option")) { 937 return ParseOption(service->mutable_options()); 938 } else { 939 return ParseServiceMethod(service->add_method()); 940 } 941 } 942 943 bool Parser::ParseServiceMethod(MethodDescriptorProto* method) { 944 DO(Consume("rpc")); 945 RecordLocation(method, DescriptorPool::ErrorCollector::NAME); 946 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name.")); 947 948 // Parse input type. 949 DO(Consume("(")); 950 RecordLocation(method, DescriptorPool::ErrorCollector::INPUT_TYPE); 951 DO(ParseUserDefinedType(method->mutable_input_type())); 952 DO(Consume(")")); 953 954 // Parse output type. 955 DO(Consume("returns")); 956 DO(Consume("(")); 957 RecordLocation(method, DescriptorPool::ErrorCollector::OUTPUT_TYPE); 958 DO(ParseUserDefinedType(method->mutable_output_type())); 959 DO(Consume(")")); 960 961 if (TryConsume("{")) { 962 // Options! 963 while (!TryConsume("}")) { 964 if (AtEnd()) { 965 AddError("Reached end of input in method options (missing '}')."); 966 return false; 967 } 968 969 if (TryConsume(";")) { 970 // empty statement; ignore 971 } else { 972 if (!ParseOption(method->mutable_options())) { 973 // This statement failed to parse. Skip it, but keep looping to 974 // parse other statements. 975 SkipStatement(); 976 } 977 } 978 } 979 } else { 980 DO(Consume(";")); 981 } 982 983 return true; 984 } 985 986 // ------------------------------------------------------------------- 987 988 bool Parser::ParseLabel(FieldDescriptorProto::Label* label) { 989 if (TryConsume("optional")) { 990 *label = FieldDescriptorProto::LABEL_OPTIONAL; 991 return true; 992 } else if (TryConsume("repeated")) { 993 *label = FieldDescriptorProto::LABEL_REPEATED; 994 return true; 995 } else if (TryConsume("required")) { 996 *label = FieldDescriptorProto::LABEL_REQUIRED; 997 return true; 998 } else { 999 AddError("Expected \"required\", \"optional\", or \"repeated\"."); 1000 // We can actually reasonably recover here by just assuming the user 1001 // forgot the label altogether. 1002 *label = FieldDescriptorProto::LABEL_OPTIONAL; 1003 return true; 1004 } 1005 } 1006 1007 bool Parser::ParseType(FieldDescriptorProto::Type* type, 1008 string* type_name) { 1009 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text); 1010 if (iter != kTypeNames.end()) { 1011 *type = iter->second; 1012 input_->Next(); 1013 } else { 1014 DO(ParseUserDefinedType(type_name)); 1015 } 1016 return true; 1017 } 1018 1019 bool Parser::ParseUserDefinedType(string* type_name) { 1020 type_name->clear(); 1021 1022 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text); 1023 if (iter != kTypeNames.end()) { 1024 // Note: The only place enum types are allowed is for field types, but 1025 // if we are parsing a field type then we would not get here because 1026 // primitives are allowed there as well. So this error message doesn't 1027 // need to account for enums. 1028 AddError("Expected message type."); 1029 1030 // Pretend to accept this type so that we can go on parsing. 1031 *type_name = input_->current().text; 1032 input_->Next(); 1033 return true; 1034 } 1035 1036 // A leading "." means the name is fully-qualified. 1037 if (TryConsume(".")) type_name->append("."); 1038 1039 // Consume the first part of the name. 1040 string identifier; 1041 DO(ConsumeIdentifier(&identifier, "Expected type name.")); 1042 type_name->append(identifier); 1043 1044 // Consume more parts. 1045 while (TryConsume(".")) { 1046 type_name->append("."); 1047 DO(ConsumeIdentifier(&identifier, "Expected identifier.")); 1048 type_name->append(identifier); 1049 } 1050 1051 return true; 1052 } 1053 1054 // =================================================================== 1055 1056 bool Parser::ParsePackage(FileDescriptorProto* file) { 1057 if (file->has_package()) { 1058 AddError("Multiple package definitions."); 1059 // Don't append the new package to the old one. Just replace it. Not 1060 // that it really matters since this is an error anyway. 1061 file->clear_package(); 1062 } 1063 1064 DO(Consume("package")); 1065 1066 RecordLocation(file, DescriptorPool::ErrorCollector::NAME); 1067 1068 while (true) { 1069 string identifier; 1070 DO(ConsumeIdentifier(&identifier, "Expected identifier.")); 1071 file->mutable_package()->append(identifier); 1072 if (!TryConsume(".")) break; 1073 file->mutable_package()->append("."); 1074 } 1075 1076 DO(Consume(";")); 1077 return true; 1078 } 1079 1080 bool Parser::ParseImport(string* import_filename) { 1081 DO(Consume("import")); 1082 DO(ConsumeString(import_filename, 1083 "Expected a string naming the file to import.")); 1084 DO(Consume(";")); 1085 return true; 1086 } 1087 1088 bool Parser::ParseOption(Message* options) { 1089 DO(Consume("option")); 1090 DO(ParseOptionAssignment(options)); 1091 DO(Consume(";")); 1092 return true; 1093 } 1094 1095 // =================================================================== 1096 1097 SourceLocationTable::SourceLocationTable() {} 1098 SourceLocationTable::~SourceLocationTable() {} 1099 1100 bool SourceLocationTable::Find( 1101 const Message* descriptor, 1102 DescriptorPool::ErrorCollector::ErrorLocation location, 1103 int* line, int* column) const { 1104 const pair<int, int>* result = 1105 FindOrNull(location_map_, make_pair(descriptor, location)); 1106 if (result == NULL) { 1107 *line = -1; 1108 *column = 0; 1109 return false; 1110 } else { 1111 *line = result->first; 1112 *column = result->second; 1113 return true; 1114 } 1115 } 1116 1117 void SourceLocationTable::Add( 1118 const Message* descriptor, 1119 DescriptorPool::ErrorCollector::ErrorLocation location, 1120 int line, int column) { 1121 location_map_[make_pair(descriptor, location)] = make_pair(line, column); 1122 } 1123 1124 void SourceLocationTable::Clear() { 1125 location_map_.clear(); 1126 } 1127 1128 } // namespace compiler 1129 } // namespace protobuf 1130 } // namespace google 1131