1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // Implements parsing of .proto files to FileDescriptorProtos. 36 37 #ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__ 38 #define GOOGLE_PROTOBUF_COMPILER_PARSER_H__ 39 40 #include <map> 41 #include <string> 42 #include <utility> 43 #include <google/protobuf/stubs/common.h> 44 #include <google/protobuf/descriptor.h> 45 #include <google/protobuf/descriptor.pb.h> 46 #include <google/protobuf/repeated_field.h> 47 #include <google/protobuf/io/tokenizer.h> 48 49 namespace google { 50 namespace protobuf { class Message; } 51 52 namespace protobuf { 53 namespace compiler { 54 55 // Defined in this file. 56 class Parser; 57 class SourceLocationTable; 58 59 // Implements parsing of protocol definitions (such as .proto files). 60 // 61 // Note that most users will be more interested in the Importer class. 62 // Parser is a lower-level class which simply converts a single .proto file 63 // to a FileDescriptorProto. It does not resolve import directives or perform 64 // many other kinds of validation needed to construct a complete 65 // FileDescriptor. 66 class LIBPROTOBUF_EXPORT Parser { 67 public: 68 Parser(); 69 ~Parser(); 70 71 // Parse the entire input and construct a FileDescriptorProto representing 72 // it. Returns true if no errors occurred, false otherwise. 73 bool Parse(io::Tokenizer* input, FileDescriptorProto* file); 74 75 // Optional fetaures: 76 77 // Requests that locations of certain definitions be recorded to the given 78 // SourceLocationTable while parsing. This can be used to look up exact line 79 // and column numbers for errors reported by DescriptorPool during validation. 80 // Set to NULL (the default) to discard source location information. 81 void RecordSourceLocationsTo(SourceLocationTable* location_table) { 82 source_location_table_ = location_table; 83 } 84 85 // Requsets that errors be recorded to the given ErrorCollector while 86 // parsing. Set to NULL (the default) to discard error messages. 87 void RecordErrorsTo(io::ErrorCollector* error_collector) { 88 error_collector_ = error_collector; 89 } 90 91 // Returns the identifier used in the "syntax = " declaration, if one was 92 // seen during the last call to Parse(), or the empty string otherwise. 93 const string& GetSyntaxIdentifier() { return syntax_identifier_; } 94 95 // If set true, input files will be required to begin with a syntax 96 // identifier. Otherwise, files may omit this. If a syntax identifier 97 // is provided, it must be 'syntax = "proto2";' and must appear at the 98 // top of this file regardless of whether or not it was required. 99 void SetRequireSyntaxIdentifier(bool value) { 100 require_syntax_identifier_ = value; 101 } 102 103 // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop 104 // parsing as soon as it has seen the syntax identifier, or lack thereof. 105 // This is useful for quickly identifying the syntax of the file without 106 // parsing the whole thing. If this is enabled, no error will be recorded 107 // if the syntax identifier is something other than "proto2" (since 108 // presumably the caller intends to deal with that), but other kinds of 109 // errors (e.g. parse errors) will still be reported. When this is enabled, 110 // you may pass a NULL FileDescriptorProto to Parse(). 111 void SetStopAfterSyntaxIdentifier(bool value) { 112 stop_after_syntax_identifier_ = value; 113 } 114 115 private: 116 // ================================================================= 117 // Error recovery helpers 118 119 // Consume the rest of the current statement. This consumes tokens 120 // until it sees one of: 121 // ';' Consumes the token and returns. 122 // '{' Consumes the brace then calls SkipRestOfBlock(). 123 // '}' Returns without consuming. 124 // EOF Returns (can't consume). 125 // The Parser often calls SkipStatement() after encountering a syntax 126 // error. This allows it to go on parsing the following lines, allowing 127 // it to report more than just one error in the file. 128 void SkipStatement(); 129 130 // Consume the rest of the current block, including nested blocks, 131 // ending after the closing '}' is encountered and consumed, or at EOF. 132 void SkipRestOfBlock(); 133 134 // ----------------------------------------------------------------- 135 // Single-token consuming helpers 136 // 137 // These make parsing code more readable. 138 139 // True if the current token is TYPE_END. 140 inline bool AtEnd(); 141 142 // True if the next token matches the given text. 143 inline bool LookingAt(const char* text); 144 // True if the next token is of the given type. 145 inline bool LookingAtType(io::Tokenizer::TokenType token_type); 146 147 // If the next token exactly matches the text given, consume it and return 148 // true. Otherwise, return false without logging an error. 149 bool TryConsume(const char* text); 150 151 // These attempt to read some kind of token from the input. If successful, 152 // they return true. Otherwise they return false and add the given error 153 // to the error list. 154 155 // Consume a token with the exact text given. 156 bool Consume(const char* text, const char* error); 157 // Same as above, but automatically generates the error "Expected \"text\".", 158 // where "text" is the expected token text. 159 bool Consume(const char* text); 160 // Consume a token of type IDENTIFIER and store its text in "output". 161 bool ConsumeIdentifier(string* output, const char* error); 162 // Consume an integer and store its value in "output". 163 bool ConsumeInteger(int* output, const char* error); 164 // Consume a 64-bit integer and store its value in "output". If the value 165 // is greater than max_value, an error will be reported. 166 bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error); 167 // Consume a number and store its value in "output". This will accept 168 // tokens of either INTEGER or FLOAT type. 169 bool ConsumeNumber(double* output, const char* error); 170 // Consume a string literal and store its (unescaped) value in "output". 171 bool ConsumeString(string* output, const char* error); 172 173 // ----------------------------------------------------------------- 174 // Error logging helpers 175 176 // Invokes error_collector_->AddError(), if error_collector_ is not NULL. 177 void AddError(int line, int column, const string& error); 178 179 // Invokes error_collector_->AddError() with the line and column number 180 // of the current token. 181 void AddError(const string& error); 182 183 // Record the given line and column and associate it with this descriptor 184 // in the SourceLocationTable. 185 void RecordLocation(const Message* descriptor, 186 DescriptorPool::ErrorCollector::ErrorLocation location, 187 int line, int column); 188 189 // Record the current line and column and associate it with this descriptor 190 // in the SourceLocationTable. 191 void RecordLocation(const Message* descriptor, 192 DescriptorPool::ErrorCollector::ErrorLocation location); 193 194 // ================================================================= 195 // Parsers for various language constructs 196 197 // Parses the "syntax = \"proto2\";" line at the top of the file. Returns 198 // false if it failed to parse or if the syntax identifier was not 199 // recognized. 200 bool ParseSyntaxIdentifier(); 201 202 // These methods parse various individual bits of code. They return 203 // false if they completely fail to parse the construct. In this case, 204 // it is probably necessary to skip the rest of the statement to recover. 205 // However, if these methods return true, it does NOT mean that there 206 // were no errors; only that there were no *syntax* errors. For instance, 207 // if a service method is defined using proper syntax but uses a primitive 208 // type as its input or output, ParseMethodField() still returns true 209 // and only reports the error by calling AddError(). In practice, this 210 // makes logic much simpler for the caller. 211 212 // Parse a top-level message, enum, service, etc. 213 bool ParseTopLevelStatement(FileDescriptorProto* file); 214 215 // Parse various language high-level language construrcts. 216 bool ParseMessageDefinition(DescriptorProto* message); 217 bool ParseEnumDefinition(EnumDescriptorProto* enum_type); 218 bool ParseServiceDefinition(ServiceDescriptorProto* service); 219 bool ParsePackage(FileDescriptorProto* file); 220 bool ParseImport(string* import_filename); 221 bool ParseOption(Message* options); 222 223 // These methods parse the contents of a message, enum, or service type and 224 // add them to the given object. They consume the entire block including 225 // the beginning and ending brace. 226 bool ParseMessageBlock(DescriptorProto* message); 227 bool ParseEnumBlock(EnumDescriptorProto* enum_type); 228 bool ParseServiceBlock(ServiceDescriptorProto* service); 229 230 // Parse one statement within a message, enum, or service block, inclunding 231 // final semicolon. 232 bool ParseMessageStatement(DescriptorProto* message); 233 bool ParseEnumStatement(EnumDescriptorProto* message); 234 bool ParseServiceStatement(ServiceDescriptorProto* message); 235 236 // Parse a field of a message. If the field is a group, its type will be 237 // added to "messages". 238 bool ParseMessageField(FieldDescriptorProto* field, 239 RepeatedPtrField<DescriptorProto>* messages); 240 241 // Parse an "extensions" declaration. 242 bool ParseExtensions(DescriptorProto* message); 243 244 // Parse an "extend" declaration. 245 bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions, 246 RepeatedPtrField<DescriptorProto>* messages); 247 248 // Parse a single enum value within an enum block. 249 bool ParseEnumConstant(EnumValueDescriptorProto* enum_value); 250 251 // Parse enum constant options, i.e. the list in square brackets at the end 252 // of the enum constant value definition. 253 bool ParseEnumConstantOptions(EnumValueDescriptorProto* value); 254 255 // Parse a single method within a service definition. 256 bool ParseServiceMethod(MethodDescriptorProto* method); 257 258 // Parse "required", "optional", or "repeated" and fill in "label" 259 // with the value. 260 bool ParseLabel(FieldDescriptorProto::Label* label); 261 262 // Parse a type name and fill in "type" (if it is a primitive) or 263 // "type_name" (if it is not) with the type parsed. 264 bool ParseType(FieldDescriptorProto::Type* type, 265 string* type_name); 266 // Parse a user-defined type and fill in "type_name" with the name. 267 // If a primitive type is named, it is treated as an error. 268 bool ParseUserDefinedType(string* type_name); 269 270 // Parses field options, i.e. the stuff in square brackets at the end 271 // of a field definition. Also parses default value. 272 bool ParseFieldOptions(FieldDescriptorProto* field); 273 274 // Parse the "default" option. This needs special handling because its 275 // type is the field's type. 276 bool ParseDefaultAssignment(FieldDescriptorProto* field); 277 278 // Parse a single option name/value pair, e.g. "ctype = CORD". The name 279 // identifies a field of the given Message, and the value of that field 280 // is set to the parsed value. 281 bool ParseOptionAssignment(Message* options); 282 283 // Parses a single part of a multipart option name. A multipart name consists 284 // of names separated by dots. Each name is either an identifier or a series 285 // of identifiers separated by dots and enclosed in parentheses. E.g., 286 // "foo.(bar.baz).qux". 287 bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option); 288 289 // ================================================================= 290 291 io::Tokenizer* input_; 292 io::ErrorCollector* error_collector_; 293 SourceLocationTable* source_location_table_; 294 bool had_errors_; 295 bool require_syntax_identifier_; 296 bool stop_after_syntax_identifier_; 297 string syntax_identifier_; 298 299 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser); 300 }; 301 302 // A table mapping (descriptor, ErrorLocation) pairs -- as reported by 303 // DescriptorPool when validating descriptors -- to line and column numbers 304 // within the original source code. 305 class LIBPROTOBUF_EXPORT SourceLocationTable { 306 public: 307 SourceLocationTable(); 308 ~SourceLocationTable(); 309 310 // Finds the precise location of the given error and fills in *line and 311 // *column with the line and column numbers. If not found, sets *line to 312 // -1 and *column to 0 (since line = -1 is used to mean "error has no exact 313 // location" in the ErrorCollector interface). Returns true if found, false 314 // otherwise. 315 bool Find(const Message* descriptor, 316 DescriptorPool::ErrorCollector::ErrorLocation location, 317 int* line, int* column) const; 318 319 // Adds a location to the table. 320 void Add(const Message* descriptor, 321 DescriptorPool::ErrorCollector::ErrorLocation location, 322 int line, int column); 323 324 // Clears the contents of the table. 325 void Clear(); 326 327 private: 328 typedef map< 329 pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>, 330 pair<int, int> > LocationMap; 331 LocationMap location_map_; 332 }; 333 334 } // namespace compiler 335 } // namespace protobuf 336 337 } // namespace google 338 #endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__ 339