1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr (at) google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // Utilities for printing and parsing protocol messages in a human-readable, 36 // text-based format. 37 38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 40 41 #include <map> 42 #include <string> 43 #include <vector> 44 #include <google/protobuf/stubs/common.h> 45 #include <google/protobuf/message.h> 46 #include <google/protobuf/descriptor.h> 47 48 namespace google { 49 namespace protobuf { 50 51 namespace io { 52 class ErrorCollector; // tokenizer.h 53 } 54 55 // This class implements protocol buffer text format. Printing and parsing 56 // protocol messages in text format is useful for debugging and human editing 57 // of messages. 58 // 59 // This class is really a namespace that contains only static methods. 60 class LIBPROTOBUF_EXPORT TextFormat { 61 public: 62 // Outputs a textual representation of the given message to the given 63 // output stream. 64 static bool Print(const Message& message, io::ZeroCopyOutputStream* output); 65 66 // Print the fields in an UnknownFieldSet. They are printed by tag number 67 // only. Embedded messages are heuristically identified by attempting to 68 // parse them. 69 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, 70 io::ZeroCopyOutputStream* output); 71 72 // Like Print(), but outputs directly to a string. 73 static bool PrintToString(const Message& message, string* output); 74 75 // Like PrintUnknownFields(), but outputs directly to a string. 76 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, 77 string* output); 78 79 // Outputs a textual representation of the value of the field supplied on 80 // the message supplied. For non-repeated fields, an index of -1 must 81 // be supplied. Note that this method will print the default value for a 82 // field if it is not set. 83 static void PrintFieldValueToString(const Message& message, 84 const FieldDescriptor* field, 85 int index, 86 string* output); 87 88 // Class for those users which require more fine-grained control over how 89 // a protobuffer message is printed out. 90 class LIBPROTOBUF_EXPORT Printer { 91 public: 92 Printer(); 93 ~Printer(); 94 95 // Like TextFormat::Print 96 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const; 97 // Like TextFormat::PrintUnknownFields 98 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, 99 io::ZeroCopyOutputStream* output) const; 100 // Like TextFormat::PrintToString 101 bool PrintToString(const Message& message, string* output) const; 102 // Like TextFormat::PrintUnknownFieldsToString 103 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, 104 string* output) const; 105 // Like TextFormat::PrintFieldValueToString 106 void PrintFieldValueToString(const Message& message, 107 const FieldDescriptor* field, 108 int index, 109 string* output) const; 110 111 // Adjust the initial indent level of all output. Each indent level is 112 // equal to two spaces. 113 void SetInitialIndentLevel(int indent_level) { 114 initial_indent_level_ = indent_level; 115 } 116 117 // If printing in single line mode, then the entire message will be output 118 // on a single line with no line breaks. 119 void SetSingleLineMode(bool single_line_mode) { 120 single_line_mode_ = single_line_mode; 121 } 122 123 // Set true to print repeated primitives in a format like: 124 // field_name: [1, 2, 3, 4] 125 // instead of printing each value on its own line. Short format applies 126 // only to primitive values -- i.e. everything except strings and 127 // sub-messages/groups. 128 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) { 129 use_short_repeated_primitives_ = use_short_repeated_primitives; 130 } 131 132 // Set true to output UTF-8 instead of ASCII. The only difference 133 // is that bytes >= 0x80 in string fields will not be escaped, 134 // because they are assumed to be part of UTF-8 multi-byte 135 // sequences. 136 void SetUseUtf8StringEscaping(bool as_utf8) { 137 utf8_string_escaping_ = as_utf8; 138 } 139 140 private: 141 // Forward declaration of an internal class used to print the text 142 // output to the OutputStream (see text_format.cc for implementation). 143 class TextGenerator; 144 145 // Internal Print method, used for writing to the OutputStream via 146 // the TextGenerator class. 147 void Print(const Message& message, 148 TextGenerator& generator) const; 149 150 // Print a single field. 151 void PrintField(const Message& message, 152 const Reflection* reflection, 153 const FieldDescriptor* field, 154 TextGenerator& generator) const; 155 156 // Print a repeated primitive field in short form. 157 void PrintShortRepeatedField(const Message& message, 158 const Reflection* reflection, 159 const FieldDescriptor* field, 160 TextGenerator& generator) const; 161 162 // Print the name of a field -- i.e. everything that comes before the 163 // ':' for a single name/value pair. 164 void PrintFieldName(const Message& message, 165 const Reflection* reflection, 166 const FieldDescriptor* field, 167 TextGenerator& generator) const; 168 169 // Outputs a textual representation of the value of the field supplied on 170 // the message supplied or the default value if not set. 171 void PrintFieldValue(const Message& message, 172 const Reflection* reflection, 173 const FieldDescriptor* field, 174 int index, 175 TextGenerator& generator) const; 176 177 // Print the fields in an UnknownFieldSet. They are printed by tag number 178 // only. Embedded messages are heuristically identified by attempting to 179 // parse them. 180 void PrintUnknownFields(const UnknownFieldSet& unknown_fields, 181 TextGenerator& generator) const; 182 183 int initial_indent_level_; 184 185 bool single_line_mode_; 186 187 bool use_short_repeated_primitives_; 188 189 bool utf8_string_escaping_; 190 }; 191 192 // Parses a text-format protocol message from the given input stream to 193 // the given message object. This function parses the format written 194 // by Print(). 195 static bool Parse(io::ZeroCopyInputStream* input, Message* output); 196 // Like Parse(), but reads directly from a string. 197 static bool ParseFromString(const string& input, Message* output); 198 199 // Like Parse(), but the data is merged into the given message, as if 200 // using Message::MergeFrom(). 201 static bool Merge(io::ZeroCopyInputStream* input, Message* output); 202 // Like Merge(), but reads directly from a string. 203 static bool MergeFromString(const string& input, Message* output); 204 205 // Parse the given text as a single field value and store it into the 206 // given field of the given message. If the field is a repeated field, 207 // the new value will be added to the end 208 static bool ParseFieldValueFromString(const string& input, 209 const FieldDescriptor* field, 210 Message* message); 211 212 // Interface that TextFormat::Parser can use to find extensions. 213 // This class may be extended in the future to find more information 214 // like fields, etc. 215 class LIBPROTOBUF_EXPORT Finder { 216 public: 217 virtual ~Finder(); 218 219 // Try to find an extension of *message by fully-qualified field 220 // name. Returns NULL if no extension is known for this name or number. 221 virtual const FieldDescriptor* FindExtension( 222 Message* message, 223 const string& name) const = 0; 224 }; 225 226 // A location in the parsed text. 227 struct ParseLocation { 228 int line; 229 int column; 230 231 ParseLocation() : line(-1), column(-1) {} 232 ParseLocation(int line_param, int column_param) 233 : line(line_param), column(column_param) {} 234 }; 235 236 // Data structure which is populated with the locations of each field 237 // value parsed from the text. 238 class LIBPROTOBUF_EXPORT ParseInfoTree { 239 public: 240 ParseInfoTree(); 241 ~ParseInfoTree(); 242 243 // Returns the parse location for index-th value of the field in the parsed 244 // text. If none exists, returns a location with line = -1. Index should be 245 // -1 for not-repeated fields. 246 ParseLocation GetLocation(const FieldDescriptor* field, int index) const; 247 248 // Returns the parse info tree for the given field, which must be a message 249 // type. The nested information tree is owned by the root tree and will be 250 // deleted when it is deleted. 251 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field, 252 int index) const; 253 254 private: 255 // Allow the text format parser to record information into the tree. 256 friend class TextFormat; 257 258 // Records the starting location of a single value for a field. 259 void RecordLocation(const FieldDescriptor* field, ParseLocation location); 260 261 // Create and records a nested tree for a nested message field. 262 ParseInfoTree* CreateNested(const FieldDescriptor* field); 263 264 // Defines the map from the index-th field descriptor to its parse location. 265 typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap; 266 267 // Defines the map from the index-th field descriptor to the nested parse 268 // info tree. 269 typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap; 270 271 LocationMap locations_; 272 NestedMap nested_; 273 274 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree); 275 }; 276 277 // For more control over parsing, use this class. 278 class LIBPROTOBUF_EXPORT Parser { 279 public: 280 Parser(); 281 ~Parser(); 282 283 // Like TextFormat::Parse(). 284 bool Parse(io::ZeroCopyInputStream* input, Message* output); 285 // Like TextFormat::ParseFromString(). 286 bool ParseFromString(const string& input, Message* output); 287 // Like TextFormat::Merge(). 288 bool Merge(io::ZeroCopyInputStream* input, Message* output); 289 // Like TextFormat::MergeFromString(). 290 bool MergeFromString(const string& input, Message* output); 291 292 // Set where to report parse errors. If NULL (the default), errors will 293 // be printed to stderr. 294 void RecordErrorsTo(io::ErrorCollector* error_collector) { 295 error_collector_ = error_collector; 296 } 297 298 // Set how parser finds extensions. If NULL (the default), the 299 // parser will use the standard Reflection object associated with 300 // the message being parsed. 301 void SetFinder(Finder* finder) { 302 finder_ = finder; 303 } 304 305 // Sets where location information about the parse will be written. If NULL 306 // (the default), then no location will be written. 307 void WriteLocationsTo(ParseInfoTree* tree) { 308 parse_info_tree_ = tree; 309 } 310 311 // Normally parsing fails if, after parsing, output->IsInitialized() 312 // returns false. Call AllowPartialMessage(true) to skip this check. 313 void AllowPartialMessage(bool allow) { 314 allow_partial_ = allow; 315 } 316 317 // Like TextFormat::ParseFieldValueFromString 318 bool ParseFieldValueFromString(const string& input, 319 const FieldDescriptor* field, 320 Message* output); 321 322 323 private: 324 // Forward declaration of an internal class used to parse text 325 // representations (see text_format.cc for implementation). 326 class ParserImpl; 327 328 // Like TextFormat::Merge(). The provided implementation is used 329 // to do the parsing. 330 bool MergeUsingImpl(io::ZeroCopyInputStream* input, 331 Message* output, 332 ParserImpl* parser_impl); 333 334 io::ErrorCollector* error_collector_; 335 Finder* finder_; 336 ParseInfoTree* parse_info_tree_; 337 bool allow_partial_; 338 bool allow_unknown_field_; 339 }; 340 341 private: 342 // Hack: ParseInfoTree declares TextFormat as a friend which should extend 343 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some 344 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide 345 // helpers for ParserImpl to call methods of ParseInfoTree. 346 static inline void RecordLocation(ParseInfoTree* info_tree, 347 const FieldDescriptor* field, 348 ParseLocation location); 349 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree, 350 const FieldDescriptor* field); 351 352 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat); 353 }; 354 355 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree, 356 const FieldDescriptor* field, 357 ParseLocation location) { 358 info_tree->RecordLocation(field, location); 359 } 360 361 inline TextFormat::ParseInfoTree* TextFormat::CreateNested( 362 ParseInfoTree* info_tree, const FieldDescriptor* field) { 363 return info_tree->CreateNested(field); 364 } 365 366 } // namespace protobuf 367 368 } // namespace google 369 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__ 370