1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr (at) google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // Utilities for printing and parsing protocol messages in a human-readable, 36 // text-based format. 37 38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 40 41 #include <map> 42 #include <memory> 43 #ifndef _SHARED_PTR_H 44 #include <google/protobuf/stubs/shared_ptr.h> 45 #endif 46 #include <string> 47 #include <vector> 48 49 #include <google/protobuf/stubs/common.h> 50 #include <google/protobuf/descriptor.h> 51 #include <google/protobuf/message.h> 52 53 namespace google { 54 namespace protobuf { 55 56 namespace io { 57 class ErrorCollector; // tokenizer.h 58 } 59 60 // This class implements protocol buffer text format. Printing and parsing 61 // protocol messages in text format is useful for debugging and human editing 62 // of messages. 63 // 64 // This class is really a namespace that contains only static methods. 65 class LIBPROTOBUF_EXPORT TextFormat { 66 public: 67 // Outputs a textual representation of the given message to the given 68 // output stream. 69 static bool Print(const Message& message, io::ZeroCopyOutputStream* output); 70 71 // Print the fields in an UnknownFieldSet. They are printed by tag number 72 // only. Embedded messages are heuristically identified by attempting to 73 // parse them. 74 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, 75 io::ZeroCopyOutputStream* output); 76 77 // Like Print(), but outputs directly to a string. 78 static bool PrintToString(const Message& message, string* output); 79 80 // Like PrintUnknownFields(), but outputs directly to a string. 81 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, 82 string* output); 83 84 // Outputs a textual representation of the value of the field supplied on 85 // the message supplied. For non-repeated fields, an index of -1 must 86 // be supplied. Note that this method will print the default value for a 87 // field if it is not set. 88 static void PrintFieldValueToString(const Message& message, 89 const FieldDescriptor* field, 90 int index, 91 string* output); 92 93 // The default printer that converts scalar values from fields into 94 // their string representation. 95 // You can derive from this FieldValuePrinter if you want to have 96 // fields to be printed in a different way and register it at the 97 // Printer. 98 class LIBPROTOBUF_EXPORT FieldValuePrinter { 99 public: 100 FieldValuePrinter(); 101 virtual ~FieldValuePrinter(); 102 virtual string PrintBool(bool val) const; 103 virtual string PrintInt32(int32 val) const; 104 virtual string PrintUInt32(uint32 val) const; 105 virtual string PrintInt64(int64 val) const; 106 virtual string PrintUInt64(uint64 val) const; 107 virtual string PrintFloat(float val) const; 108 virtual string PrintDouble(double val) const; 109 virtual string PrintString(const string& val) const; 110 virtual string PrintBytes(const string& val) const; 111 virtual string PrintEnum(int32 val, const string& name) const; 112 virtual string PrintFieldName(const Message& message, 113 const Reflection* reflection, 114 const FieldDescriptor* field) const; 115 virtual string PrintMessageStart(const Message& message, 116 int field_index, 117 int field_count, 118 bool single_line_mode) const; 119 virtual string PrintMessageEnd(const Message& message, 120 int field_index, 121 int field_count, 122 bool single_line_mode) const; 123 124 private: 125 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter); 126 }; 127 128 // Class for those users which require more fine-grained control over how 129 // a protobuffer message is printed out. 130 class LIBPROTOBUF_EXPORT Printer { 131 public: 132 Printer(); 133 ~Printer(); 134 135 // Like TextFormat::Print 136 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const; 137 // Like TextFormat::PrintUnknownFields 138 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, 139 io::ZeroCopyOutputStream* output) const; 140 // Like TextFormat::PrintToString 141 bool PrintToString(const Message& message, string* output) const; 142 // Like TextFormat::PrintUnknownFieldsToString 143 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, 144 string* output) const; 145 // Like TextFormat::PrintFieldValueToString 146 void PrintFieldValueToString(const Message& message, 147 const FieldDescriptor* field, 148 int index, 149 string* output) const; 150 151 // Adjust the initial indent level of all output. Each indent level is 152 // equal to two spaces. 153 void SetInitialIndentLevel(int indent_level) { 154 initial_indent_level_ = indent_level; 155 } 156 157 // If printing in single line mode, then the entire message will be output 158 // on a single line with no line breaks. 159 void SetSingleLineMode(bool single_line_mode) { 160 single_line_mode_ = single_line_mode; 161 } 162 163 bool IsInSingleLineMode() { 164 return single_line_mode_; 165 } 166 167 // If use_field_number is true, uses field number instead of field name. 168 void SetUseFieldNumber(bool use_field_number) { 169 use_field_number_ = use_field_number; 170 } 171 172 // Set true to print repeated primitives in a format like: 173 // field_name: [1, 2, 3, 4] 174 // instead of printing each value on its own line. Short format applies 175 // only to primitive values -- i.e. everything except strings and 176 // sub-messages/groups. 177 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) { 178 use_short_repeated_primitives_ = use_short_repeated_primitives; 179 } 180 181 // Set true to output UTF-8 instead of ASCII. The only difference 182 // is that bytes >= 0x80 in string fields will not be escaped, 183 // because they are assumed to be part of UTF-8 multi-byte 184 // sequences. This will change the default FieldValuePrinter. 185 void SetUseUtf8StringEscaping(bool as_utf8); 186 187 // Set the default FieldValuePrinter that is used for all fields that 188 // don't have a field-specific printer registered. 189 // Takes ownership of the printer. 190 void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer); 191 192 // Sets whether we want to hide unknown fields or not. 193 // Usually unknown fields are printed in a generic way that includes the 194 // tag number of the field instead of field name. However, sometimes it 195 // is useful to be able to print the message without unknown fields (e.g. 196 // for the python protobuf version to maintain consistency between its pure 197 // python and c++ implementations). 198 void SetHideUnknownFields(bool hide) { 199 hide_unknown_fields_ = hide; 200 } 201 202 // If print_message_fields_in_index_order is true, print fields of a proto 203 // message using the order defined in source code instead of the field 204 // number. By default, use the field number order. 205 void SetPrintMessageFieldsInIndexOrder( 206 bool print_message_fields_in_index_order) { 207 print_message_fields_in_index_order_ = 208 print_message_fields_in_index_order; 209 } 210 211 // If expand==true, expand google.protobuf.Any payloads. The output 212 // will be of form 213 // [type_url] { <value_printed_in_text> } 214 // 215 // If expand==false, print Any using the default printer. The output will 216 // look like 217 // type_url: "<type_url>" value: "serialized_content" 218 void SetExpandAny(bool expand) { 219 expand_any_ = expand; 220 } 221 222 // If non-zero, we truncate all string fields that are longer than this 223 // threshold. This is useful when the proto message has very long strings, 224 // e.g., dump of encoded image file. 225 // 226 // NOTE(hfgong): Setting a non-zero value breaks round-trip safe 227 // property of TextFormat::Printer. That is, from the printed message, we 228 // cannot fully recover the original string field any more. 229 void SetTruncateStringFieldLongerThan( 230 const int64 truncate_string_field_longer_than) { 231 truncate_string_field_longer_than_ = truncate_string_field_longer_than; 232 } 233 234 // Register a custom field-specific FieldValuePrinter for fields 235 // with a particular FieldDescriptor. 236 // Returns "true" if the registration succeeded, or "false", if there is 237 // already a printer for that FieldDescriptor. 238 // Takes ownership of the printer on successful registration. 239 bool RegisterFieldValuePrinter(const FieldDescriptor* field, 240 const FieldValuePrinter* printer); 241 242 private: 243 // Forward declaration of an internal class used to print the text 244 // output to the OutputStream (see text_format.cc for implementation). 245 class TextGenerator; 246 247 // Internal Print method, used for writing to the OutputStream via 248 // the TextGenerator class. 249 void Print(const Message& message, 250 TextGenerator& generator) const; 251 252 // Print a single field. 253 void PrintField(const Message& message, 254 const Reflection* reflection, 255 const FieldDescriptor* field, 256 TextGenerator& generator) const; 257 258 // Print a repeated primitive field in short form. 259 void PrintShortRepeatedField(const Message& message, 260 const Reflection* reflection, 261 const FieldDescriptor* field, 262 TextGenerator& generator) const; 263 264 // Print the name of a field -- i.e. everything that comes before the 265 // ':' for a single name/value pair. 266 void PrintFieldName(const Message& message, 267 const Reflection* reflection, 268 const FieldDescriptor* field, 269 TextGenerator& generator) const; 270 271 // Outputs a textual representation of the value of the field supplied on 272 // the message supplied or the default value if not set. 273 void PrintFieldValue(const Message& message, 274 const Reflection* reflection, 275 const FieldDescriptor* field, 276 int index, 277 TextGenerator& generator) const; 278 279 // Print the fields in an UnknownFieldSet. They are printed by tag number 280 // only. Embedded messages are heuristically identified by attempting to 281 // parse them. 282 void PrintUnknownFields(const UnknownFieldSet& unknown_fields, 283 TextGenerator& generator) const; 284 285 bool PrintAny(const Message& message, TextGenerator& generator) const; 286 287 int initial_indent_level_; 288 289 bool single_line_mode_; 290 291 bool use_field_number_; 292 293 bool use_short_repeated_primitives_; 294 295 bool hide_unknown_fields_; 296 297 bool print_message_fields_in_index_order_; 298 299 bool expand_any_; 300 301 int64 truncate_string_field_longer_than_; 302 303 google::protobuf::scoped_ptr<const FieldValuePrinter> default_field_value_printer_; 304 typedef map<const FieldDescriptor*, 305 const FieldValuePrinter*> CustomPrinterMap; 306 CustomPrinterMap custom_printers_; 307 }; 308 309 // Parses a text-format protocol message from the given input stream to 310 // the given message object. This function parses the human-readable format 311 // written by Print(). Returns true on success. The message is cleared first, 312 // even if the function fails -- See Merge() to avoid this behavior. 313 // 314 // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}" 315 // 316 // One use for this function is parsing handwritten strings in test code. 317 // Another use is to parse the output from google::protobuf::Message::DebugString() 318 // (or ShortDebugString()), because these functions output using 319 // google::protobuf::TextFormat::Print(). 320 // 321 // If you would like to read a protocol buffer serialized in the 322 // (non-human-readable) binary wire format, see 323 // google::protobuf::MessageLite::ParseFromString(). 324 static bool Parse(io::ZeroCopyInputStream* input, Message* output); 325 // Like Parse(), but reads directly from a string. 326 static bool ParseFromString(const string& input, Message* output); 327 328 // Like Parse(), but the data is merged into the given message, as if 329 // using Message::MergeFrom(). 330 static bool Merge(io::ZeroCopyInputStream* input, Message* output); 331 // Like Merge(), but reads directly from a string. 332 static bool MergeFromString(const string& input, Message* output); 333 334 // Parse the given text as a single field value and store it into the 335 // given field of the given message. If the field is a repeated field, 336 // the new value will be added to the end 337 static bool ParseFieldValueFromString(const string& input, 338 const FieldDescriptor* field, 339 Message* message); 340 341 // Interface that TextFormat::Parser can use to find extensions. 342 // This class may be extended in the future to find more information 343 // like fields, etc. 344 class LIBPROTOBUF_EXPORT Finder { 345 public: 346 virtual ~Finder(); 347 348 // Try to find an extension of *message by fully-qualified field 349 // name. Returns NULL if no extension is known for this name or number. 350 virtual const FieldDescriptor* FindExtension( 351 Message* message, 352 const string& name) const = 0; 353 }; 354 355 // A location in the parsed text. 356 struct ParseLocation { 357 int line; 358 int column; 359 360 ParseLocation() : line(-1), column(-1) {} 361 ParseLocation(int line_param, int column_param) 362 : line(line_param), column(column_param) {} 363 }; 364 365 // Data structure which is populated with the locations of each field 366 // value parsed from the text. 367 class LIBPROTOBUF_EXPORT ParseInfoTree { 368 public: 369 ParseInfoTree(); 370 ~ParseInfoTree(); 371 372 // Returns the parse location for index-th value of the field in the parsed 373 // text. If none exists, returns a location with line = -1. Index should be 374 // -1 for not-repeated fields. 375 ParseLocation GetLocation(const FieldDescriptor* field, int index) const; 376 377 // Returns the parse info tree for the given field, which must be a message 378 // type. The nested information tree is owned by the root tree and will be 379 // deleted when it is deleted. 380 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field, 381 int index) const; 382 383 private: 384 // Allow the text format parser to record information into the tree. 385 friend class TextFormat; 386 387 // Records the starting location of a single value for a field. 388 void RecordLocation(const FieldDescriptor* field, ParseLocation location); 389 390 // Create and records a nested tree for a nested message field. 391 ParseInfoTree* CreateNested(const FieldDescriptor* field); 392 393 // Defines the map from the index-th field descriptor to its parse location. 394 typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap; 395 396 // Defines the map from the index-th field descriptor to the nested parse 397 // info tree. 398 typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap; 399 400 LocationMap locations_; 401 NestedMap nested_; 402 403 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree); 404 }; 405 406 // For more control over parsing, use this class. 407 class LIBPROTOBUF_EXPORT Parser { 408 public: 409 Parser(); 410 ~Parser(); 411 412 // Like TextFormat::Parse(). 413 bool Parse(io::ZeroCopyInputStream* input, Message* output); 414 // Like TextFormat::ParseFromString(). 415 bool ParseFromString(const string& input, Message* output); 416 // Like TextFormat::Merge(). 417 bool Merge(io::ZeroCopyInputStream* input, Message* output); 418 // Like TextFormat::MergeFromString(). 419 bool MergeFromString(const string& input, Message* output); 420 421 // Set where to report parse errors. If NULL (the default), errors will 422 // be printed to stderr. 423 void RecordErrorsTo(io::ErrorCollector* error_collector) { 424 error_collector_ = error_collector; 425 } 426 427 // Set how parser finds extensions. If NULL (the default), the 428 // parser will use the standard Reflection object associated with 429 // the message being parsed. 430 void SetFinder(Finder* finder) { 431 finder_ = finder; 432 } 433 434 // Sets where location information about the parse will be written. If NULL 435 // (the default), then no location will be written. 436 void WriteLocationsTo(ParseInfoTree* tree) { 437 parse_info_tree_ = tree; 438 } 439 440 // Normally parsing fails if, after parsing, output->IsInitialized() 441 // returns false. Call AllowPartialMessage(true) to skip this check. 442 void AllowPartialMessage(bool allow) { 443 allow_partial_ = allow; 444 } 445 446 // Allow field names to be matched case-insensitively. 447 // This is not advisable if there are fields that only differ in case, or 448 // if you want to enforce writing in the canonical form. 449 // This is 'false' by default. 450 void AllowCaseInsensitiveField(bool allow) { 451 allow_case_insensitive_field_ = allow; 452 } 453 454 // Like TextFormat::ParseFieldValueFromString 455 bool ParseFieldValueFromString(const string& input, 456 const FieldDescriptor* field, 457 Message* output); 458 459 460 void AllowFieldNumber(bool allow) { 461 allow_field_number_ = allow; 462 } 463 464 private: 465 // Forward declaration of an internal class used to parse text 466 // representations (see text_format.cc for implementation). 467 class ParserImpl; 468 469 // Like TextFormat::Merge(). The provided implementation is used 470 // to do the parsing. 471 bool MergeUsingImpl(io::ZeroCopyInputStream* input, 472 Message* output, 473 ParserImpl* parser_impl); 474 475 io::ErrorCollector* error_collector_; 476 Finder* finder_; 477 ParseInfoTree* parse_info_tree_; 478 bool allow_partial_; 479 bool allow_case_insensitive_field_; 480 bool allow_unknown_field_; 481 bool allow_unknown_enum_; 482 bool allow_field_number_; 483 bool allow_relaxed_whitespace_; 484 bool allow_singular_overwrites_; 485 }; 486 487 488 private: 489 // Hack: ParseInfoTree declares TextFormat as a friend which should extend 490 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some 491 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide 492 // helpers for ParserImpl to call methods of ParseInfoTree. 493 static inline void RecordLocation(ParseInfoTree* info_tree, 494 const FieldDescriptor* field, 495 ParseLocation location); 496 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree, 497 const FieldDescriptor* field); 498 499 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat); 500 }; 501 502 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree, 503 const FieldDescriptor* field, 504 ParseLocation location) { 505 info_tree->RecordLocation(field, location); 506 } 507 508 509 inline TextFormat::ParseInfoTree* TextFormat::CreateNested( 510 ParseInfoTree* info_tree, const FieldDescriptor* field) { 511 return info_tree->CreateNested(field); 512 } 513 514 } // namespace protobuf 515 516 } // namespace google 517 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 518