1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr (at) google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // Utilities for printing and parsing protocol messages in a human-readable, 36 // text-based format. 37 38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 40 41 #include <map> 42 #include <memory> 43 #include <string> 44 #include <vector> 45 46 #include <google/protobuf/stubs/common.h> 47 #include <google/protobuf/descriptor.h> 48 #include <google/protobuf/message.h> 49 50 namespace google { 51 namespace protobuf { 52 53 namespace io { 54 class ErrorCollector; // tokenizer.h 55 } 56 57 // This class implements protocol buffer text format. Printing and parsing 58 // protocol messages in text format is useful for debugging and human editing 59 // of messages. 60 // 61 // This class is really a namespace that contains only static methods. 62 class LIBPROTOBUF_EXPORT TextFormat { 63 public: 64 // Outputs a textual representation of the given message to the given 65 // output stream. 66 static bool Print(const Message& message, io::ZeroCopyOutputStream* output); 67 68 // Print the fields in an UnknownFieldSet. They are printed by tag number 69 // only. Embedded messages are heuristically identified by attempting to 70 // parse them. 71 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, 72 io::ZeroCopyOutputStream* output); 73 74 // Like Print(), but outputs directly to a string. 75 static bool PrintToString(const Message& message, string* output); 76 77 // Like PrintUnknownFields(), but outputs directly to a string. 78 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, 79 string* output); 80 81 // Outputs a textual representation of the value of the field supplied on 82 // the message supplied. For non-repeated fields, an index of -1 must 83 // be supplied. Note that this method will print the default value for a 84 // field if it is not set. 85 static void PrintFieldValueToString(const Message& message, 86 const FieldDescriptor* field, 87 int index, 88 string* output); 89 90 // The default printer that converts scalar values from fields into 91 // their string representation. 92 // You can derive from this FieldValuePrinter if you want to have 93 // fields to be printed in a different way and register it at the 94 // Printer. 95 class LIBPROTOBUF_EXPORT FieldValuePrinter { 96 public: 97 FieldValuePrinter(); 98 virtual ~FieldValuePrinter(); 99 virtual string PrintBool(bool val) const; 100 virtual string PrintInt32(int32 val) const; 101 virtual string PrintUInt32(uint32 val) const; 102 virtual string PrintInt64(int64 val) const; 103 virtual string PrintUInt64(uint64 val) const; 104 virtual string PrintFloat(float val) const; 105 virtual string PrintDouble(double val) const; 106 virtual string PrintString(const string& val) const; 107 virtual string PrintBytes(const string& val) const; 108 virtual string PrintEnum(int32 val, const string& name) const; 109 virtual string PrintFieldName(const Message& message, 110 const Reflection* reflection, 111 const FieldDescriptor* field) const; 112 virtual string PrintMessageStart(const Message& message, 113 int field_index, 114 int field_count, 115 bool single_line_mode) const; 116 virtual string PrintMessageEnd(const Message& message, 117 int field_index, 118 int field_count, 119 bool single_line_mode) const; 120 121 private: 122 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter); 123 }; 124 125 // Class for those users which require more fine-grained control over how 126 // a protobuffer message is printed out. 127 class LIBPROTOBUF_EXPORT Printer { 128 public: 129 Printer(); 130 ~Printer(); 131 132 // Like TextFormat::Print 133 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const; 134 // Like TextFormat::PrintUnknownFields 135 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, 136 io::ZeroCopyOutputStream* output) const; 137 // Like TextFormat::PrintToString 138 bool PrintToString(const Message& message, string* output) const; 139 // Like TextFormat::PrintUnknownFieldsToString 140 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, 141 string* output) const; 142 // Like TextFormat::PrintFieldValueToString 143 void PrintFieldValueToString(const Message& message, 144 const FieldDescriptor* field, 145 int index, 146 string* output) const; 147 148 // Adjust the initial indent level of all output. Each indent level is 149 // equal to two spaces. 150 void SetInitialIndentLevel(int indent_level) { 151 initial_indent_level_ = indent_level; 152 } 153 154 // If printing in single line mode, then the entire message will be output 155 // on a single line with no line breaks. 156 void SetSingleLineMode(bool single_line_mode) { 157 single_line_mode_ = single_line_mode; 158 } 159 160 bool IsInSingleLineMode() { 161 return single_line_mode_; 162 } 163 164 // If use_field_number is true, uses field number instead of field name. 165 void SetUseFieldNumber(bool use_field_number) { 166 use_field_number_ = use_field_number; 167 } 168 169 // Set true to print repeated primitives in a format like: 170 // field_name: [1, 2, 3, 4] 171 // instead of printing each value on its own line. Short format applies 172 // only to primitive values -- i.e. everything except strings and 173 // sub-messages/groups. 174 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) { 175 use_short_repeated_primitives_ = use_short_repeated_primitives; 176 } 177 178 // Set true to output UTF-8 instead of ASCII. The only difference 179 // is that bytes >= 0x80 in string fields will not be escaped, 180 // because they are assumed to be part of UTF-8 multi-byte 181 // sequences. This will change the default FieldValuePrinter. 182 void SetUseUtf8StringEscaping(bool as_utf8); 183 184 // Set the default FieldValuePrinter that is used for all fields that 185 // don't have a field-specific printer registered. 186 // Takes ownership of the printer. 187 void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer); 188 189 // Sets whether we want to hide unknown fields or not. 190 // Usually unknown fields are printed in a generic way that includes the 191 // tag number of the field instead of field name. However, sometimes it 192 // is useful to be able to print the message without unknown fields (e.g. 193 // for the python protobuf version to maintain consistency between its pure 194 // python and c++ implementations). 195 void SetHideUnknownFields(bool hide) { 196 hide_unknown_fields_ = hide; 197 } 198 199 // If print_message_fields_in_index_order is true, print fields of a proto 200 // message using the order defined in source code instead of the field 201 // number. By default, use the field number order. 202 void SetPrintMessageFieldsInIndexOrder( 203 bool print_message_fields_in_index_order) { 204 print_message_fields_in_index_order_ = 205 print_message_fields_in_index_order; 206 } 207 208 // Register a custom field-specific FieldValuePrinter for fields 209 // with a particular FieldDescriptor. 210 // Returns "true" if the registration succeeded, or "false", if there is 211 // already a printer for that FieldDescriptor. 212 // Takes ownership of the printer on successful registration. 213 bool RegisterFieldValuePrinter(const FieldDescriptor* field, 214 const FieldValuePrinter* printer); 215 216 private: 217 // Forward declaration of an internal class used to print the text 218 // output to the OutputStream (see text_format.cc for implementation). 219 class TextGenerator; 220 221 // Internal Print method, used for writing to the OutputStream via 222 // the TextGenerator class. 223 void Print(const Message& message, 224 TextGenerator& generator) const; 225 226 // Print a single field. 227 void PrintField(const Message& message, 228 const Reflection* reflection, 229 const FieldDescriptor* field, 230 TextGenerator& generator) const; 231 232 // Print a repeated primitive field in short form. 233 void PrintShortRepeatedField(const Message& message, 234 const Reflection* reflection, 235 const FieldDescriptor* field, 236 TextGenerator& generator) const; 237 238 // Print the name of a field -- i.e. everything that comes before the 239 // ':' for a single name/value pair. 240 void PrintFieldName(const Message& message, 241 const Reflection* reflection, 242 const FieldDescriptor* field, 243 TextGenerator& generator) const; 244 245 // Outputs a textual representation of the value of the field supplied on 246 // the message supplied or the default value if not set. 247 void PrintFieldValue(const Message& message, 248 const Reflection* reflection, 249 const FieldDescriptor* field, 250 int index, 251 TextGenerator& generator) const; 252 253 // Print the fields in an UnknownFieldSet. They are printed by tag number 254 // only. Embedded messages are heuristically identified by attempting to 255 // parse them. 256 void PrintUnknownFields(const UnknownFieldSet& unknown_fields, 257 TextGenerator& generator) const; 258 259 int initial_indent_level_; 260 261 bool single_line_mode_; 262 263 bool use_field_number_; 264 265 bool use_short_repeated_primitives_; 266 267 bool hide_unknown_fields_; 268 269 bool print_message_fields_in_index_order_; 270 271 scoped_ptr<const FieldValuePrinter> default_field_value_printer_; 272 typedef map<const FieldDescriptor*, 273 const FieldValuePrinter*> CustomPrinterMap; 274 CustomPrinterMap custom_printers_; 275 }; 276 277 // Parses a text-format protocol message from the given input stream to 278 // the given message object. This function parses the format written 279 // by Print(). 280 static bool Parse(io::ZeroCopyInputStream* input, Message* output); 281 // Like Parse(), but reads directly from a string. 282 static bool ParseFromString(const string& input, Message* output); 283 284 // Like Parse(), but the data is merged into the given message, as if 285 // using Message::MergeFrom(). 286 static bool Merge(io::ZeroCopyInputStream* input, Message* output); 287 // Like Merge(), but reads directly from a string. 288 static bool MergeFromString(const string& input, Message* output); 289 290 // Parse the given text as a single field value and store it into the 291 // given field of the given message. If the field is a repeated field, 292 // the new value will be added to the end 293 static bool ParseFieldValueFromString(const string& input, 294 const FieldDescriptor* field, 295 Message* message); 296 297 // Interface that TextFormat::Parser can use to find extensions. 298 // This class may be extended in the future to find more information 299 // like fields, etc. 300 class LIBPROTOBUF_EXPORT Finder { 301 public: 302 virtual ~Finder(); 303 304 // Try to find an extension of *message by fully-qualified field 305 // name. Returns NULL if no extension is known for this name or number. 306 virtual const FieldDescriptor* FindExtension( 307 Message* message, 308 const string& name) const = 0; 309 }; 310 311 // A location in the parsed text. 312 struct ParseLocation { 313 int line; 314 int column; 315 316 ParseLocation() : line(-1), column(-1) {} 317 ParseLocation(int line_param, int column_param) 318 : line(line_param), column(column_param) {} 319 }; 320 321 // Data structure which is populated with the locations of each field 322 // value parsed from the text. 323 class LIBPROTOBUF_EXPORT ParseInfoTree { 324 public: 325 ParseInfoTree(); 326 ~ParseInfoTree(); 327 328 // Returns the parse location for index-th value of the field in the parsed 329 // text. If none exists, returns a location with line = -1. Index should be 330 // -1 for not-repeated fields. 331 ParseLocation GetLocation(const FieldDescriptor* field, int index) const; 332 333 // Returns the parse info tree for the given field, which must be a message 334 // type. The nested information tree is owned by the root tree and will be 335 // deleted when it is deleted. 336 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field, 337 int index) const; 338 339 private: 340 // Allow the text format parser to record information into the tree. 341 friend class TextFormat; 342 343 // Records the starting location of a single value for a field. 344 void RecordLocation(const FieldDescriptor* field, ParseLocation location); 345 346 // Create and records a nested tree for a nested message field. 347 ParseInfoTree* CreateNested(const FieldDescriptor* field); 348 349 // Defines the map from the index-th field descriptor to its parse location. 350 typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap; 351 352 // Defines the map from the index-th field descriptor to the nested parse 353 // info tree. 354 typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap; 355 356 LocationMap locations_; 357 NestedMap nested_; 358 359 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree); 360 }; 361 362 // For more control over parsing, use this class. 363 class LIBPROTOBUF_EXPORT Parser { 364 public: 365 Parser(); 366 ~Parser(); 367 368 // Like TextFormat::Parse(). 369 bool Parse(io::ZeroCopyInputStream* input, Message* output); 370 // Like TextFormat::ParseFromString(). 371 bool ParseFromString(const string& input, Message* output); 372 // Like TextFormat::Merge(). 373 bool Merge(io::ZeroCopyInputStream* input, Message* output); 374 // Like TextFormat::MergeFromString(). 375 bool MergeFromString(const string& input, Message* output); 376 377 // Set where to report parse errors. If NULL (the default), errors will 378 // be printed to stderr. 379 void RecordErrorsTo(io::ErrorCollector* error_collector) { 380 error_collector_ = error_collector; 381 } 382 383 // Set how parser finds extensions. If NULL (the default), the 384 // parser will use the standard Reflection object associated with 385 // the message being parsed. 386 void SetFinder(Finder* finder) { 387 finder_ = finder; 388 } 389 390 // Sets where location information about the parse will be written. If NULL 391 // (the default), then no location will be written. 392 void WriteLocationsTo(ParseInfoTree* tree) { 393 parse_info_tree_ = tree; 394 } 395 396 // Normally parsing fails if, after parsing, output->IsInitialized() 397 // returns false. Call AllowPartialMessage(true) to skip this check. 398 void AllowPartialMessage(bool allow) { 399 allow_partial_ = allow; 400 } 401 402 // Allow field names to be matched case-insensitively. 403 // This is not advisable if there are fields that only differ in case, or 404 // if you want to enforce writing in the canonical form. 405 // This is 'false' by default. 406 void AllowCaseInsensitiveField(bool allow) { 407 allow_case_insensitive_field_ = allow; 408 } 409 410 // Like TextFormat::ParseFieldValueFromString 411 bool ParseFieldValueFromString(const string& input, 412 const FieldDescriptor* field, 413 Message* output); 414 415 416 void AllowFieldNumber(bool allow) { 417 allow_field_number_ = allow; 418 } 419 420 private: 421 // Forward declaration of an internal class used to parse text 422 // representations (see text_format.cc for implementation). 423 class ParserImpl; 424 425 // Like TextFormat::Merge(). The provided implementation is used 426 // to do the parsing. 427 bool MergeUsingImpl(io::ZeroCopyInputStream* input, 428 Message* output, 429 ParserImpl* parser_impl); 430 431 io::ErrorCollector* error_collector_; 432 Finder* finder_; 433 ParseInfoTree* parse_info_tree_; 434 bool allow_partial_; 435 bool allow_case_insensitive_field_; 436 bool allow_unknown_field_; 437 bool allow_unknown_enum_; 438 bool allow_field_number_; 439 bool allow_relaxed_whitespace_; 440 bool allow_singular_overwrites_; 441 }; 442 443 444 private: 445 // Hack: ParseInfoTree declares TextFormat as a friend which should extend 446 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some 447 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide 448 // helpers for ParserImpl to call methods of ParseInfoTree. 449 static inline void RecordLocation(ParseInfoTree* info_tree, 450 const FieldDescriptor* field, 451 ParseLocation location); 452 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree, 453 const FieldDescriptor* field); 454 455 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat); 456 }; 457 458 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree, 459 const FieldDescriptor* field, 460 ParseLocation location) { 461 info_tree->RecordLocation(field, location); 462 } 463 464 465 inline TextFormat::ParseInfoTree* TextFormat::CreateNested( 466 ParseInfoTree* info_tree, const FieldDescriptor* field) { 467 return info_tree->CreateNested(field); 468 } 469 470 } // namespace protobuf 471 472 } // namespace google 473 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 474