Home | History | Annotate | Download | only in compiler
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // Recursive descent FTW.
     36 
     37 #include <float.h>
     38 #include <google/protobuf/stubs/hash.h>
     39 #include <limits>
     40 
     41 
     42 #include <google/protobuf/compiler/parser.h>
     43 #include <google/protobuf/descriptor.h>
     44 #include <google/protobuf/descriptor.pb.h>
     45 #include <google/protobuf/wire_format.h>
     46 #include <google/protobuf/io/tokenizer.h>
     47 #include <google/protobuf/stubs/common.h>
     48 #include <google/protobuf/stubs/strutil.h>
     49 #include <google/protobuf/stubs/map-util.h>
     50 
     51 namespace google {
     52 namespace protobuf {
     53 namespace compiler {
     54 
     55 using internal::WireFormat;
     56 
     57 namespace {
     58 
     59 typedef hash_map<string, FieldDescriptorProto::Type> TypeNameMap;
     60 
     61 TypeNameMap MakeTypeNameTable() {
     62   TypeNameMap result;
     63 
     64   result["double"  ] = FieldDescriptorProto::TYPE_DOUBLE;
     65   result["float"   ] = FieldDescriptorProto::TYPE_FLOAT;
     66   result["uint64"  ] = FieldDescriptorProto::TYPE_UINT64;
     67   result["fixed64" ] = FieldDescriptorProto::TYPE_FIXED64;
     68   result["fixed32" ] = FieldDescriptorProto::TYPE_FIXED32;
     69   result["bool"    ] = FieldDescriptorProto::TYPE_BOOL;
     70   result["string"  ] = FieldDescriptorProto::TYPE_STRING;
     71   result["group"   ] = FieldDescriptorProto::TYPE_GROUP;
     72 
     73   result["bytes"   ] = FieldDescriptorProto::TYPE_BYTES;
     74   result["uint32"  ] = FieldDescriptorProto::TYPE_UINT32;
     75   result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
     76   result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
     77   result["int32"   ] = FieldDescriptorProto::TYPE_INT32;
     78   result["int64"   ] = FieldDescriptorProto::TYPE_INT64;
     79   result["sint32"  ] = FieldDescriptorProto::TYPE_SINT32;
     80   result["sint64"  ] = FieldDescriptorProto::TYPE_SINT64;
     81 
     82   return result;
     83 }
     84 
     85 const TypeNameMap kTypeNames = MakeTypeNameTable();
     86 
     87 }  // anonymous namespace
     88 
     89 // Makes code slightly more readable.  The meaning of "DO(foo)" is
     90 // "Execute foo and fail if it fails.", where failure is indicated by
     91 // returning false.
     92 #define DO(STATEMENT) if (STATEMENT) {} else return false
     93 
     94 // ===================================================================
     95 
     96 Parser::Parser()
     97   : input_(NULL),
     98     error_collector_(NULL),
     99     source_location_table_(NULL),
    100     had_errors_(false),
    101     require_syntax_identifier_(false),
    102     stop_after_syntax_identifier_(false) {
    103 }
    104 
    105 Parser::~Parser() {
    106 }
    107 
    108 // ===================================================================
    109 
    110 inline bool Parser::LookingAt(const char* text) {
    111   return input_->current().text == text;
    112 }
    113 
    114 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
    115   return input_->current().type == token_type;
    116 }
    117 
    118 inline bool Parser::AtEnd() {
    119   return LookingAtType(io::Tokenizer::TYPE_END);
    120 }
    121 
    122 bool Parser::TryConsume(const char* text) {
    123   if (LookingAt(text)) {
    124     input_->Next();
    125     return true;
    126   } else {
    127     return false;
    128   }
    129 }
    130 
    131 bool Parser::Consume(const char* text, const char* error) {
    132   if (TryConsume(text)) {
    133     return true;
    134   } else {
    135     AddError(error);
    136     return false;
    137   }
    138 }
    139 
    140 bool Parser::Consume(const char* text) {
    141   if (TryConsume(text)) {
    142     return true;
    143   } else {
    144     AddError("Expected \"" + string(text) + "\".");
    145     return false;
    146   }
    147 }
    148 
    149 bool Parser::ConsumeIdentifier(string* output, const char* error) {
    150   if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    151     *output = input_->current().text;
    152     input_->Next();
    153     return true;
    154   } else {
    155     AddError(error);
    156     return false;
    157   }
    158 }
    159 
    160 bool Parser::ConsumeInteger(int* output, const char* error) {
    161   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    162     uint64 value = 0;
    163     if (!io::Tokenizer::ParseInteger(input_->current().text,
    164                                      kint32max, &value)) {
    165       AddError("Integer out of range.");
    166       // We still return true because we did, in fact, parse an integer.
    167     }
    168     *output = value;
    169     input_->Next();
    170     return true;
    171   } else {
    172     AddError(error);
    173     return false;
    174   }
    175 }
    176 
    177 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
    178                               const char* error) {
    179   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    180     if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
    181                                      output)) {
    182       AddError("Integer out of range.");
    183       // We still return true because we did, in fact, parse an integer.
    184       *output = 0;
    185     }
    186     input_->Next();
    187     return true;
    188   } else {
    189     AddError(error);
    190     return false;
    191   }
    192 }
    193 
    194 bool Parser::ConsumeNumber(double* output, const char* error) {
    195   if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
    196     *output = io::Tokenizer::ParseFloat(input_->current().text);
    197     input_->Next();
    198     return true;
    199   } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
    200     // Also accept integers.
    201     uint64 value = 0;
    202     if (!io::Tokenizer::ParseInteger(input_->current().text,
    203                                      kuint64max, &value)) {
    204       AddError("Integer out of range.");
    205       // We still return true because we did, in fact, parse a number.
    206     }
    207     *output = value;
    208     input_->Next();
    209     return true;
    210   } else if (LookingAt("inf")) {
    211     *output = numeric_limits<double>::infinity();
    212     input_->Next();
    213     return true;
    214   } else if (LookingAt("nan")) {
    215     *output = numeric_limits<double>::quiet_NaN();
    216     input_->Next();
    217     return true;
    218   } else {
    219     AddError(error);
    220     return false;
    221   }
    222 }
    223 
    224 bool Parser::ConsumeString(string* output, const char* error) {
    225   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
    226     io::Tokenizer::ParseString(input_->current().text, output);
    227     input_->Next();
    228     // Allow C++ like concatenation of adjacent string tokens.
    229     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
    230       io::Tokenizer::ParseStringAppend(input_->current().text, output);
    231       input_->Next();
    232     }
    233     return true;
    234   } else {
    235     AddError(error);
    236     return false;
    237   }
    238 }
    239 
    240 // -------------------------------------------------------------------
    241 
    242 void Parser::AddError(int line, int column, const string& error) {
    243   if (error_collector_ != NULL) {
    244     error_collector_->AddError(line, column, error);
    245   }
    246   had_errors_ = true;
    247 }
    248 
    249 void Parser::AddError(const string& error) {
    250   AddError(input_->current().line, input_->current().column, error);
    251 }
    252 
    253 void Parser::RecordLocation(
    254     const Message* descriptor,
    255     DescriptorPool::ErrorCollector::ErrorLocation location,
    256     int line, int column) {
    257   if (source_location_table_ != NULL) {
    258     source_location_table_->Add(descriptor, location, line, column);
    259   }
    260 }
    261 
    262 void Parser::RecordLocation(
    263     const Message* descriptor,
    264     DescriptorPool::ErrorCollector::ErrorLocation location) {
    265   RecordLocation(descriptor, location,
    266                  input_->current().line, input_->current().column);
    267 }
    268 
    269 // -------------------------------------------------------------------
    270 
    271 void Parser::SkipStatement() {
    272   while (true) {
    273     if (AtEnd()) {
    274       return;
    275     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
    276       if (TryConsume(";")) {
    277         return;
    278       } else if (TryConsume("{")) {
    279         SkipRestOfBlock();
    280         return;
    281       } else if (LookingAt("}")) {
    282         return;
    283       }
    284     }
    285     input_->Next();
    286   }
    287 }
    288 
    289 void Parser::SkipRestOfBlock() {
    290   while (true) {
    291     if (AtEnd()) {
    292       return;
    293     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
    294       if (TryConsume("}")) {
    295         return;
    296       } else if (TryConsume("{")) {
    297         SkipRestOfBlock();
    298       }
    299     }
    300     input_->Next();
    301   }
    302 }
    303 
    304 // ===================================================================
    305 
    306 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
    307   input_ = input;
    308   had_errors_ = false;
    309   syntax_identifier_.clear();
    310 
    311   if (LookingAtType(io::Tokenizer::TYPE_START)) {
    312     // Advance to first token.
    313     input_->Next();
    314   }
    315 
    316   if (require_syntax_identifier_ || LookingAt("syntax")) {
    317     if (!ParseSyntaxIdentifier()) {
    318       // Don't attempt to parse the file if we didn't recognize the syntax
    319       // identifier.
    320       return false;
    321     }
    322   } else if (!stop_after_syntax_identifier_) {
    323     syntax_identifier_ = "proto2";
    324   }
    325 
    326   if (stop_after_syntax_identifier_) return !had_errors_;
    327 
    328   // Repeatedly parse statements until we reach the end of the file.
    329   while (!AtEnd()) {
    330     if (!ParseTopLevelStatement(file)) {
    331       // This statement failed to parse.  Skip it, but keep looping to parse
    332       // other statements.
    333       SkipStatement();
    334 
    335       if (LookingAt("}")) {
    336         AddError("Unmatched \"}\".");
    337         input_->Next();
    338       }
    339     }
    340   }
    341 
    342   input_ = NULL;
    343   return !had_errors_;
    344 }
    345 
    346 bool Parser::ParseSyntaxIdentifier() {
    347   DO(Consume("syntax", "File must begin with 'syntax = \"proto2\";'."));
    348   DO(Consume("="));
    349   io::Tokenizer::Token syntax_token = input_->current();
    350   string syntax;
    351   DO(ConsumeString(&syntax, "Expected syntax identifier."));
    352   DO(Consume(";"));
    353 
    354   syntax_identifier_ = syntax;
    355 
    356   if (syntax != "proto2" && !stop_after_syntax_identifier_) {
    357     AddError(syntax_token.line, syntax_token.column,
    358       "Unrecognized syntax identifier \"" + syntax + "\".  This parser "
    359       "only recognizes \"proto2\".");
    360     return false;
    361   }
    362 
    363   return true;
    364 }
    365 
    366 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file) {
    367   if (TryConsume(";")) {
    368     // empty statement; ignore
    369     return true;
    370   } else if (LookingAt("message")) {
    371     return ParseMessageDefinition(file->add_message_type());
    372   } else if (LookingAt("enum")) {
    373     return ParseEnumDefinition(file->add_enum_type());
    374   } else if (LookingAt("service")) {
    375     return ParseServiceDefinition(file->add_service());
    376   } else if (LookingAt("extend")) {
    377     return ParseExtend(file->mutable_extension(),
    378                        file->mutable_message_type());
    379   } else if (LookingAt("import")) {
    380     return ParseImport(file->add_dependency());
    381   } else if (LookingAt("package")) {
    382     return ParsePackage(file);
    383   } else if (LookingAt("option")) {
    384     return ParseOption(file->mutable_options());
    385   } else {
    386     AddError("Expected top-level statement (e.g. \"message\").");
    387     return false;
    388   }
    389 }
    390 
    391 // -------------------------------------------------------------------
    392 // Messages
    393 
    394 bool Parser::ParseMessageDefinition(DescriptorProto* message) {
    395   DO(Consume("message"));
    396   RecordLocation(message, DescriptorPool::ErrorCollector::NAME);
    397   DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
    398   DO(ParseMessageBlock(message));
    399   return true;
    400 }
    401 
    402 bool Parser::ParseMessageBlock(DescriptorProto* message) {
    403   DO(Consume("{"));
    404 
    405   while (!TryConsume("}")) {
    406     if (AtEnd()) {
    407       AddError("Reached end of input in message definition (missing '}').");
    408       return false;
    409     }
    410 
    411     if (!ParseMessageStatement(message)) {
    412       // This statement failed to parse.  Skip it, but keep looping to parse
    413       // other statements.
    414       SkipStatement();
    415     }
    416   }
    417 
    418   return true;
    419 }
    420 
    421 bool Parser::ParseMessageStatement(DescriptorProto* message) {
    422   if (TryConsume(";")) {
    423     // empty statement; ignore
    424     return true;
    425   } else if (LookingAt("message")) {
    426     return ParseMessageDefinition(message->add_nested_type());
    427   } else if (LookingAt("enum")) {
    428     return ParseEnumDefinition(message->add_enum_type());
    429   } else if (LookingAt("extensions")) {
    430     return ParseExtensions(message);
    431   } else if (LookingAt("extend")) {
    432     return ParseExtend(message->mutable_extension(),
    433                        message->mutable_nested_type());
    434   } else if (LookingAt("option")) {
    435     return ParseOption(message->mutable_options());
    436   } else {
    437     return ParseMessageField(message->add_field(),
    438                              message->mutable_nested_type());
    439   }
    440 }
    441 
    442 bool Parser::ParseMessageField(FieldDescriptorProto* field,
    443                                RepeatedPtrField<DescriptorProto>* messages) {
    444   // Parse label and type.
    445   FieldDescriptorProto::Label label;
    446   DO(ParseLabel(&label));
    447   field->set_label(label);
    448 
    449   RecordLocation(field, DescriptorPool::ErrorCollector::TYPE);
    450   FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
    451   string type_name;
    452   DO(ParseType(&type, &type_name));
    453   if (type_name.empty()) {
    454     field->set_type(type);
    455   } else {
    456     field->set_type_name(type_name);
    457   }
    458 
    459   // Parse name and '='.
    460   RecordLocation(field, DescriptorPool::ErrorCollector::NAME);
    461   io::Tokenizer::Token name_token = input_->current();
    462   DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
    463   DO(Consume("=", "Missing field number."));
    464 
    465   // Parse field number.
    466   RecordLocation(field, DescriptorPool::ErrorCollector::NUMBER);
    467   int number;
    468   DO(ConsumeInteger(&number, "Expected field number."));
    469   field->set_number(number);
    470 
    471   // Parse options.
    472   DO(ParseFieldOptions(field));
    473 
    474   // Deal with groups.
    475   if (type_name.empty() && type == FieldDescriptorProto::TYPE_GROUP) {
    476     DescriptorProto* group = messages->Add();
    477     group->set_name(field->name());
    478     // Record name location to match the field name's location.
    479     RecordLocation(group, DescriptorPool::ErrorCollector::NAME,
    480                    name_token.line, name_token.column);
    481 
    482     // As a hack for backwards-compatibility, we force the group name to start
    483     // with a capital letter and lower-case the field name.  New code should
    484     // not use groups; it should use nested messages.
    485     if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
    486       AddError(name_token.line, name_token.column,
    487         "Group names must start with a capital letter.");
    488     }
    489     LowerString(field->mutable_name());
    490 
    491     field->set_type_name(group->name());
    492     if (LookingAt("{")) {
    493       DO(ParseMessageBlock(group));
    494     } else {
    495       AddError("Missing group body.");
    496       return false;
    497     }
    498   } else {
    499     DO(Consume(";"));
    500   }
    501 
    502   return true;
    503 }
    504 
    505 bool Parser::ParseFieldOptions(FieldDescriptorProto* field) {
    506   if (!TryConsume("[")) return true;
    507 
    508   // Parse field options.
    509   do {
    510     if (LookingAt("default")) {
    511       DO(ParseDefaultAssignment(field));
    512     } else {
    513       DO(ParseOptionAssignment(field->mutable_options()));
    514     }
    515   } while (TryConsume(","));
    516 
    517   DO(Consume("]"));
    518   return true;
    519 }
    520 
    521 bool Parser::ParseDefaultAssignment(FieldDescriptorProto* field) {
    522   if (field->has_default_value()) {
    523     AddError("Already set option \"default\".");
    524     field->clear_default_value();
    525   }
    526 
    527   DO(Consume("default"));
    528   DO(Consume("="));
    529 
    530   RecordLocation(field, DescriptorPool::ErrorCollector::DEFAULT_VALUE);
    531   string* default_value = field->mutable_default_value();
    532 
    533   if (!field->has_type()) {
    534     // The field has a type name, but we don't know if it is a message or an
    535     // enum yet.  Assume an enum for now.
    536     DO(ConsumeIdentifier(default_value, "Expected identifier."));
    537     return true;
    538   }
    539 
    540   switch (field->type()) {
    541     case FieldDescriptorProto::TYPE_INT32:
    542     case FieldDescriptorProto::TYPE_INT64:
    543     case FieldDescriptorProto::TYPE_SINT32:
    544     case FieldDescriptorProto::TYPE_SINT64:
    545     case FieldDescriptorProto::TYPE_SFIXED32:
    546     case FieldDescriptorProto::TYPE_SFIXED64: {
    547       uint64 max_value = kint64max;
    548       if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
    549           field->type() == FieldDescriptorProto::TYPE_SINT32 ||
    550           field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
    551         max_value = kint32max;
    552       }
    553 
    554       // These types can be negative.
    555       if (TryConsume("-")) {
    556         default_value->append("-");
    557         // Two's complement always has one more negative value than positive.
    558         ++max_value;
    559       }
    560       // Parse the integer to verify that it is not out-of-range.
    561       uint64 value;
    562       DO(ConsumeInteger64(max_value, &value, "Expected integer."));
    563       // And stringify it again.
    564       default_value->append(SimpleItoa(value));
    565       break;
    566     }
    567 
    568     case FieldDescriptorProto::TYPE_UINT32:
    569     case FieldDescriptorProto::TYPE_UINT64:
    570     case FieldDescriptorProto::TYPE_FIXED32:
    571     case FieldDescriptorProto::TYPE_FIXED64: {
    572       uint64 max_value = kuint64max;
    573       if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
    574           field->type() == FieldDescriptorProto::TYPE_FIXED32) {
    575         max_value = kuint32max;
    576       }
    577 
    578       // Numeric, not negative.
    579       if (TryConsume("-")) {
    580         AddError("Unsigned field can't have negative default value.");
    581       }
    582       // Parse the integer to verify that it is not out-of-range.
    583       uint64 value;
    584       DO(ConsumeInteger64(max_value, &value, "Expected integer."));
    585       // And stringify it again.
    586       default_value->append(SimpleItoa(value));
    587       break;
    588     }
    589 
    590     case FieldDescriptorProto::TYPE_FLOAT:
    591     case FieldDescriptorProto::TYPE_DOUBLE:
    592       // These types can be negative.
    593       if (TryConsume("-")) {
    594         default_value->append("-");
    595       }
    596       // Parse the integer because we have to convert hex integers to decimal
    597       // floats.
    598       double value;
    599       DO(ConsumeNumber(&value, "Expected number."));
    600       // And stringify it again.
    601       default_value->append(SimpleDtoa(value));
    602       break;
    603 
    604     case FieldDescriptorProto::TYPE_BOOL:
    605       if (TryConsume("true")) {
    606         default_value->assign("true");
    607       } else if (TryConsume("false")) {
    608         default_value->assign("false");
    609       } else {
    610         AddError("Expected \"true\" or \"false\".");
    611         return false;
    612       }
    613       break;
    614 
    615     case FieldDescriptorProto::TYPE_STRING:
    616       DO(ConsumeString(default_value, "Expected string."));
    617       break;
    618 
    619     case FieldDescriptorProto::TYPE_BYTES:
    620       DO(ConsumeString(default_value, "Expected string."));
    621       *default_value = CEscape(*default_value);
    622       break;
    623 
    624     case FieldDescriptorProto::TYPE_ENUM:
    625       DO(ConsumeIdentifier(default_value, "Expected identifier."));
    626       break;
    627 
    628     case FieldDescriptorProto::TYPE_MESSAGE:
    629     case FieldDescriptorProto::TYPE_GROUP:
    630       AddError("Messages can't have default values.");
    631       return false;
    632   }
    633 
    634   return true;
    635 }
    636 
    637 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option) {
    638   UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
    639   string identifier;  // We parse identifiers into this string.
    640   if (LookingAt("(")) {  // This is an extension.
    641     DO(Consume("("));
    642     // An extension name consists of dot-separated identifiers, and may begin
    643     // with a dot.
    644     if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
    645       DO(ConsumeIdentifier(&identifier, "Expected identifier."));
    646       name->mutable_name_part()->append(identifier);
    647     }
    648     while (LookingAt(".")) {
    649       DO(Consume("."));
    650       name->mutable_name_part()->append(".");
    651       DO(ConsumeIdentifier(&identifier, "Expected identifier."));
    652       name->mutable_name_part()->append(identifier);
    653     }
    654     DO(Consume(")"));
    655     name->set_is_extension(true);
    656   } else {  // This is a regular field.
    657     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
    658     name->mutable_name_part()->append(identifier);
    659     name->set_is_extension(false);
    660   }
    661   return true;
    662 }
    663 
    664 // We don't interpret the option here. Instead we store it in an
    665 // UninterpretedOption, to be interpreted later.
    666 bool Parser::ParseOptionAssignment(Message* options) {
    667   // Create an entry in the uninterpreted_option field.
    668   const FieldDescriptor* uninterpreted_option_field = options->GetDescriptor()->
    669       FindFieldByName("uninterpreted_option");
    670   GOOGLE_CHECK(uninterpreted_option_field != NULL)
    671       << "No field named \"uninterpreted_option\" in the Options proto.";
    672 
    673   UninterpretedOption* uninterpreted_option = down_cast<UninterpretedOption*>(
    674       options->GetReflection()->AddMessage(options,
    675                                            uninterpreted_option_field));
    676 
    677   // Parse dot-separated name.
    678   RecordLocation(uninterpreted_option,
    679                  DescriptorPool::ErrorCollector::OPTION_NAME);
    680 
    681   DO(ParseOptionNamePart(uninterpreted_option));
    682 
    683   while (LookingAt(".")) {
    684     DO(Consume("."));
    685     DO(ParseOptionNamePart(uninterpreted_option));
    686   }
    687 
    688   DO(Consume("="));
    689 
    690   RecordLocation(uninterpreted_option,
    691                  DescriptorPool::ErrorCollector::OPTION_VALUE);
    692 
    693   // All values are a single token, except for negative numbers, which consist
    694   // of a single '-' symbol, followed by a positive number.
    695   bool is_negative = TryConsume("-");
    696 
    697   switch (input_->current().type) {
    698     case io::Tokenizer::TYPE_START:
    699       GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
    700       return false;
    701 
    702     case io::Tokenizer::TYPE_END:
    703       AddError("Unexpected end of stream while parsing option value.");
    704       return false;
    705 
    706     case io::Tokenizer::TYPE_IDENTIFIER: {
    707       if (is_negative) {
    708         AddError("Invalid '-' symbol before identifier.");
    709         return false;
    710       }
    711       string value;
    712       DO(ConsumeIdentifier(&value, "Expected identifier."));
    713       uninterpreted_option->set_identifier_value(value);
    714       break;
    715     }
    716 
    717     case io::Tokenizer::TYPE_INTEGER: {
    718       uint64 value;
    719       uint64 max_value =
    720           is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
    721       DO(ConsumeInteger64(max_value, &value, "Expected integer."));
    722       if (is_negative) {
    723         uninterpreted_option->set_negative_int_value(-value);
    724       } else {
    725         uninterpreted_option->set_positive_int_value(value);
    726       }
    727       break;
    728     }
    729 
    730     case io::Tokenizer::TYPE_FLOAT: {
    731       double value;
    732       DO(ConsumeNumber(&value, "Expected number."));
    733       uninterpreted_option->set_double_value(is_negative ? -value : value);
    734       break;
    735     }
    736 
    737     case io::Tokenizer::TYPE_STRING: {
    738       if (is_negative) {
    739         AddError("Invalid '-' symbol before string.");
    740         return false;
    741       }
    742       string value;
    743       DO(ConsumeString(&value, "Expected string."));
    744       uninterpreted_option->set_string_value(value);
    745       break;
    746     }
    747 
    748     case io::Tokenizer::TYPE_SYMBOL:
    749       AddError("Expected option value.");
    750       return false;
    751   }
    752 
    753   return true;
    754 }
    755 
    756 bool Parser::ParseExtensions(DescriptorProto* message) {
    757   // Parse the declaration.
    758   DO(Consume("extensions"));
    759 
    760   do {
    761     DescriptorProto::ExtensionRange* range = message->add_extension_range();
    762     RecordLocation(range, DescriptorPool::ErrorCollector::NUMBER);
    763 
    764     int start, end;
    765     DO(ConsumeInteger(&start, "Expected field number range."));
    766 
    767     if (TryConsume("to")) {
    768       if (TryConsume("max")) {
    769         end = FieldDescriptor::kMaxNumber;
    770       } else {
    771         DO(ConsumeInteger(&end, "Expected integer."));
    772       }
    773     } else {
    774       end = start;
    775     }
    776 
    777     // Users like to specify inclusive ranges, but in code we like the end
    778     // number to be exclusive.
    779     ++end;
    780 
    781     range->set_start(start);
    782     range->set_end(end);
    783   } while (TryConsume(","));
    784 
    785   DO(Consume(";"));
    786   return true;
    787 }
    788 
    789 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
    790                          RepeatedPtrField<DescriptorProto>* messages) {
    791   DO(Consume("extend"));
    792 
    793   // We expect to see at least one extension field defined in the extend block.
    794   // We need to create it now so we can record the extendee's location.
    795   FieldDescriptorProto* first_field = extensions->Add();
    796 
    797   // Parse the extendee type.
    798   RecordLocation(first_field, DescriptorPool::ErrorCollector::EXTENDEE);
    799   DO(ParseUserDefinedType(first_field->mutable_extendee()));
    800 
    801   // Parse the block.
    802   DO(Consume("{"));
    803 
    804   bool is_first = true;
    805 
    806   do {
    807     if (AtEnd()) {
    808       AddError("Reached end of input in extend definition (missing '}').");
    809       return false;
    810     }
    811 
    812     FieldDescriptorProto* field;
    813     if (is_first) {
    814       field = first_field;
    815       is_first = false;
    816     } else {
    817       field = extensions->Add();
    818       field->set_extendee(first_field->extendee());
    819     }
    820 
    821     if (!ParseMessageField(field, messages)) {
    822       // This statement failed to parse.  Skip it, but keep looping to parse
    823       // other statements.
    824       SkipStatement();
    825     }
    826   } while(!TryConsume("}"));
    827 
    828   return true;
    829 }
    830 
    831 // -------------------------------------------------------------------
    832 // Enums
    833 
    834 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type) {
    835   DO(Consume("enum"));
    836   RecordLocation(enum_type, DescriptorPool::ErrorCollector::NAME);
    837   DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
    838   DO(ParseEnumBlock(enum_type));
    839   return true;
    840 }
    841 
    842 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type) {
    843   DO(Consume("{"));
    844 
    845   while (!TryConsume("}")) {
    846     if (AtEnd()) {
    847       AddError("Reached end of input in enum definition (missing '}').");
    848       return false;
    849     }
    850 
    851     if (!ParseEnumStatement(enum_type)) {
    852       // This statement failed to parse.  Skip it, but keep looping to parse
    853       // other statements.
    854       SkipStatement();
    855     }
    856   }
    857 
    858   return true;
    859 }
    860 
    861 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type) {
    862   if (TryConsume(";")) {
    863     // empty statement; ignore
    864     return true;
    865   } else if (LookingAt("option")) {
    866     return ParseOption(enum_type->mutable_options());
    867   } else {
    868     return ParseEnumConstant(enum_type->add_value());
    869   }
    870 }
    871 
    872 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value) {
    873   RecordLocation(enum_value, DescriptorPool::ErrorCollector::NAME);
    874   DO(ConsumeIdentifier(enum_value->mutable_name(),
    875                        "Expected enum constant name."));
    876   DO(Consume("=", "Missing numeric value for enum constant."));
    877 
    878   bool is_negative = TryConsume("-");
    879   int number;
    880   DO(ConsumeInteger(&number, "Expected integer."));
    881   if (is_negative) number *= -1;
    882   enum_value->set_number(number);
    883 
    884   DO(ParseEnumConstantOptions(enum_value));
    885 
    886   DO(Consume(";"));
    887 
    888   return true;
    889 }
    890 
    891 bool Parser::ParseEnumConstantOptions(EnumValueDescriptorProto* value) {
    892   if (!TryConsume("[")) return true;
    893 
    894   do {
    895     DO(ParseOptionAssignment(value->mutable_options()));
    896   } while (TryConsume(","));
    897 
    898   DO(Consume("]"));
    899   return true;
    900 }
    901 
    902 // -------------------------------------------------------------------
    903 // Services
    904 
    905 bool Parser::ParseServiceDefinition(ServiceDescriptorProto* service) {
    906   DO(Consume("service"));
    907   RecordLocation(service, DescriptorPool::ErrorCollector::NAME);
    908   DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
    909   DO(ParseServiceBlock(service));
    910   return true;
    911 }
    912 
    913 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service) {
    914   DO(Consume("{"));
    915 
    916   while (!TryConsume("}")) {
    917     if (AtEnd()) {
    918       AddError("Reached end of input in service definition (missing '}').");
    919       return false;
    920     }
    921 
    922     if (!ParseServiceStatement(service)) {
    923       // This statement failed to parse.  Skip it, but keep looping to parse
    924       // other statements.
    925       SkipStatement();
    926     }
    927   }
    928 
    929   return true;
    930 }
    931 
    932 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service) {
    933   if (TryConsume(";")) {
    934     // empty statement; ignore
    935     return true;
    936   } else if (LookingAt("option")) {
    937     return ParseOption(service->mutable_options());
    938   } else {
    939     return ParseServiceMethod(service->add_method());
    940   }
    941 }
    942 
    943 bool Parser::ParseServiceMethod(MethodDescriptorProto* method) {
    944   DO(Consume("rpc"));
    945   RecordLocation(method, DescriptorPool::ErrorCollector::NAME);
    946   DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
    947 
    948   // Parse input type.
    949   DO(Consume("("));
    950   RecordLocation(method, DescriptorPool::ErrorCollector::INPUT_TYPE);
    951   DO(ParseUserDefinedType(method->mutable_input_type()));
    952   DO(Consume(")"));
    953 
    954   // Parse output type.
    955   DO(Consume("returns"));
    956   DO(Consume("("));
    957   RecordLocation(method, DescriptorPool::ErrorCollector::OUTPUT_TYPE);
    958   DO(ParseUserDefinedType(method->mutable_output_type()));
    959   DO(Consume(")"));
    960 
    961   if (TryConsume("{")) {
    962     // Options!
    963     while (!TryConsume("}")) {
    964       if (AtEnd()) {
    965         AddError("Reached end of input in method options (missing '}').");
    966         return false;
    967       }
    968 
    969       if (TryConsume(";")) {
    970         // empty statement; ignore
    971       } else {
    972         if (!ParseOption(method->mutable_options())) {
    973           // This statement failed to parse.  Skip it, but keep looping to
    974           // parse other statements.
    975           SkipStatement();
    976         }
    977       }
    978     }
    979   } else {
    980     DO(Consume(";"));
    981   }
    982 
    983   return true;
    984 }
    985 
    986 // -------------------------------------------------------------------
    987 
    988 bool Parser::ParseLabel(FieldDescriptorProto::Label* label) {
    989   if (TryConsume("optional")) {
    990     *label = FieldDescriptorProto::LABEL_OPTIONAL;
    991     return true;
    992   } else if (TryConsume("repeated")) {
    993     *label = FieldDescriptorProto::LABEL_REPEATED;
    994     return true;
    995   } else if (TryConsume("required")) {
    996     *label = FieldDescriptorProto::LABEL_REQUIRED;
    997     return true;
    998   } else {
    999     AddError("Expected \"required\", \"optional\", or \"repeated\".");
   1000     // We can actually reasonably recover here by just assuming the user
   1001     // forgot the label altogether.
   1002     *label = FieldDescriptorProto::LABEL_OPTIONAL;
   1003     return true;
   1004   }
   1005 }
   1006 
   1007 bool Parser::ParseType(FieldDescriptorProto::Type* type,
   1008                        string* type_name) {
   1009   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
   1010   if (iter != kTypeNames.end()) {
   1011     *type = iter->second;
   1012     input_->Next();
   1013   } else {
   1014     DO(ParseUserDefinedType(type_name));
   1015   }
   1016   return true;
   1017 }
   1018 
   1019 bool Parser::ParseUserDefinedType(string* type_name) {
   1020   type_name->clear();
   1021 
   1022   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
   1023   if (iter != kTypeNames.end()) {
   1024     // Note:  The only place enum types are allowed is for field types, but
   1025     //   if we are parsing a field type then we would not get here because
   1026     //   primitives are allowed there as well.  So this error message doesn't
   1027     //   need to account for enums.
   1028     AddError("Expected message type.");
   1029 
   1030     // Pretend to accept this type so that we can go on parsing.
   1031     *type_name = input_->current().text;
   1032     input_->Next();
   1033     return true;
   1034   }
   1035 
   1036   // A leading "." means the name is fully-qualified.
   1037   if (TryConsume(".")) type_name->append(".");
   1038 
   1039   // Consume the first part of the name.
   1040   string identifier;
   1041   DO(ConsumeIdentifier(&identifier, "Expected type name."));
   1042   type_name->append(identifier);
   1043 
   1044   // Consume more parts.
   1045   while (TryConsume(".")) {
   1046     type_name->append(".");
   1047     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
   1048     type_name->append(identifier);
   1049   }
   1050 
   1051   return true;
   1052 }
   1053 
   1054 // ===================================================================
   1055 
   1056 bool Parser::ParsePackage(FileDescriptorProto* file) {
   1057   if (file->has_package()) {
   1058     AddError("Multiple package definitions.");
   1059     // Don't append the new package to the old one.  Just replace it.  Not
   1060     // that it really matters since this is an error anyway.
   1061     file->clear_package();
   1062   }
   1063 
   1064   DO(Consume("package"));
   1065 
   1066   RecordLocation(file, DescriptorPool::ErrorCollector::NAME);
   1067 
   1068   while (true) {
   1069     string identifier;
   1070     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
   1071     file->mutable_package()->append(identifier);
   1072     if (!TryConsume(".")) break;
   1073     file->mutable_package()->append(".");
   1074   }
   1075 
   1076   DO(Consume(";"));
   1077   return true;
   1078 }
   1079 
   1080 bool Parser::ParseImport(string* import_filename) {
   1081   DO(Consume("import"));
   1082   DO(ConsumeString(import_filename,
   1083     "Expected a string naming the file to import."));
   1084   DO(Consume(";"));
   1085   return true;
   1086 }
   1087 
   1088 bool Parser::ParseOption(Message* options) {
   1089   DO(Consume("option"));
   1090   DO(ParseOptionAssignment(options));
   1091   DO(Consume(";"));
   1092   return true;
   1093 }
   1094 
   1095 // ===================================================================
   1096 
   1097 SourceLocationTable::SourceLocationTable() {}
   1098 SourceLocationTable::~SourceLocationTable() {}
   1099 
   1100 bool SourceLocationTable::Find(
   1101     const Message* descriptor,
   1102     DescriptorPool::ErrorCollector::ErrorLocation location,
   1103     int* line, int* column) const {
   1104   const pair<int, int>* result =
   1105     FindOrNull(location_map_, make_pair(descriptor, location));
   1106   if (result == NULL) {
   1107     *line   = -1;
   1108     *column = 0;
   1109     return false;
   1110   } else {
   1111     *line   = result->first;
   1112     *column = result->second;
   1113     return true;
   1114   }
   1115 }
   1116 
   1117 void SourceLocationTable::Add(
   1118     const Message* descriptor,
   1119     DescriptorPool::ErrorCollector::ErrorLocation location,
   1120     int line, int column) {
   1121   location_map_[make_pair(descriptor, location)] = make_pair(line, column);
   1122 }
   1123 
   1124 void SourceLocationTable::Clear() {
   1125   location_map_.clear();
   1126 }
   1127 
   1128 }  // namespace compiler
   1129 }  // namespace protobuf
   1130 }  // namespace google
   1131