Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: jschorr (at) google.com (Joseph Schorr)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // Utilities for printing and parsing protocol messages in a human-readable,
     36 // text-based format.
     37 
     38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
     39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
     40 
     41 #include <map>
     42 #include <memory>
     43 #ifndef _SHARED_PTR_H
     44 #include <google/protobuf/stubs/shared_ptr.h>
     45 #endif
     46 #include <string>
     47 #include <vector>
     48 
     49 #include <google/protobuf/stubs/common.h>
     50 #include <google/protobuf/descriptor.h>
     51 #include <google/protobuf/message.h>
     52 
     53 namespace google {
     54 namespace protobuf {
     55 
     56 namespace io {
     57   class ErrorCollector;      // tokenizer.h
     58 }
     59 
     60 // This class implements protocol buffer text format.  Printing and parsing
     61 // protocol messages in text format is useful for debugging and human editing
     62 // of messages.
     63 //
     64 // This class is really a namespace that contains only static methods.
     65 class LIBPROTOBUF_EXPORT TextFormat {
     66  public:
     67   // Outputs a textual representation of the given message to the given
     68   // output stream.
     69   static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
     70 
     71   // Print the fields in an UnknownFieldSet.  They are printed by tag number
     72   // only.  Embedded messages are heuristically identified by attempting to
     73   // parse them.
     74   static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
     75                                  io::ZeroCopyOutputStream* output);
     76 
     77   // Like Print(), but outputs directly to a string.
     78   static bool PrintToString(const Message& message, string* output);
     79 
     80   // Like PrintUnknownFields(), but outputs directly to a string.
     81   static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
     82                                          string* output);
     83 
     84   // Outputs a textual representation of the value of the field supplied on
     85   // the message supplied. For non-repeated fields, an index of -1 must
     86   // be supplied. Note that this method will print the default value for a
     87   // field if it is not set.
     88   static void PrintFieldValueToString(const Message& message,
     89                                       const FieldDescriptor* field,
     90                                       int index,
     91                                       string* output);
     92 
     93   // The default printer that converts scalar values from fields into
     94   // their string representation.
     95   // You can derive from this FieldValuePrinter if you want to have
     96   // fields to be printed in a different way and register it at the
     97   // Printer.
     98   class LIBPROTOBUF_EXPORT FieldValuePrinter {
     99    public:
    100     FieldValuePrinter();
    101     virtual ~FieldValuePrinter();
    102     virtual string PrintBool(bool val) const;
    103     virtual string PrintInt32(int32 val) const;
    104     virtual string PrintUInt32(uint32 val) const;
    105     virtual string PrintInt64(int64 val) const;
    106     virtual string PrintUInt64(uint64 val) const;
    107     virtual string PrintFloat(float val) const;
    108     virtual string PrintDouble(double val) const;
    109     virtual string PrintString(const string& val) const;
    110     virtual string PrintBytes(const string& val) const;
    111     virtual string PrintEnum(int32 val, const string& name) const;
    112     virtual string PrintFieldName(const Message& message,
    113                                   const Reflection* reflection,
    114                                   const FieldDescriptor* field) const;
    115     virtual string PrintMessageStart(const Message& message,
    116                                      int field_index,
    117                                      int field_count,
    118                                      bool single_line_mode) const;
    119     virtual string PrintMessageEnd(const Message& message,
    120                                    int field_index,
    121                                    int field_count,
    122                                    bool single_line_mode) const;
    123 
    124    private:
    125     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
    126   };
    127 
    128   // Class for those users which require more fine-grained control over how
    129   // a protobuffer message is printed out.
    130   class LIBPROTOBUF_EXPORT Printer {
    131    public:
    132     Printer();
    133     ~Printer();
    134 
    135     // Like TextFormat::Print
    136     bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
    137     // Like TextFormat::PrintUnknownFields
    138     bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
    139                             io::ZeroCopyOutputStream* output) const;
    140     // Like TextFormat::PrintToString
    141     bool PrintToString(const Message& message, string* output) const;
    142     // Like TextFormat::PrintUnknownFieldsToString
    143     bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
    144                                     string* output) const;
    145     // Like TextFormat::PrintFieldValueToString
    146     void PrintFieldValueToString(const Message& message,
    147                                  const FieldDescriptor* field,
    148                                  int index,
    149                                  string* output) const;
    150 
    151     // Adjust the initial indent level of all output.  Each indent level is
    152     // equal to two spaces.
    153     void SetInitialIndentLevel(int indent_level) {
    154       initial_indent_level_ = indent_level;
    155     }
    156 
    157     // If printing in single line mode, then the entire message will be output
    158     // on a single line with no line breaks.
    159     void SetSingleLineMode(bool single_line_mode) {
    160       single_line_mode_ = single_line_mode;
    161     }
    162 
    163     bool IsInSingleLineMode() {
    164       return single_line_mode_;
    165     }
    166 
    167     // If use_field_number is true, uses field number instead of field name.
    168     void SetUseFieldNumber(bool use_field_number) {
    169       use_field_number_ = use_field_number;
    170     }
    171 
    172     // Set true to print repeated primitives in a format like:
    173     //   field_name: [1, 2, 3, 4]
    174     // instead of printing each value on its own line.  Short format applies
    175     // only to primitive values -- i.e. everything except strings and
    176     // sub-messages/groups.
    177     void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
    178       use_short_repeated_primitives_ = use_short_repeated_primitives;
    179     }
    180 
    181     // Set true to output UTF-8 instead of ASCII.  The only difference
    182     // is that bytes >= 0x80 in string fields will not be escaped,
    183     // because they are assumed to be part of UTF-8 multi-byte
    184     // sequences. This will change the default FieldValuePrinter.
    185     void SetUseUtf8StringEscaping(bool as_utf8);
    186 
    187     // Set the default FieldValuePrinter that is used for all fields that
    188     // don't have a field-specific printer registered.
    189     // Takes ownership of the printer.
    190     void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
    191 
    192     // Sets whether we want to hide unknown fields or not.
    193     // Usually unknown fields are printed in a generic way that includes the
    194     // tag number of the field instead of field name. However, sometimes it
    195     // is useful to be able to print the message without unknown fields (e.g.
    196     // for the python protobuf version to maintain consistency between its pure
    197     // python and c++ implementations).
    198     void SetHideUnknownFields(bool hide) {
    199       hide_unknown_fields_ = hide;
    200     }
    201 
    202     // If print_message_fields_in_index_order is true, print fields of a proto
    203     // message using the order defined in source code instead of the field
    204     // number. By default, use the field number order.
    205     void SetPrintMessageFieldsInIndexOrder(
    206         bool print_message_fields_in_index_order) {
    207       print_message_fields_in_index_order_ =
    208           print_message_fields_in_index_order;
    209     }
    210 
    211     // If expand==true, expand google.protobuf.Any payloads. The output
    212     // will be of form
    213     //    [type_url] { <value_printed_in_text> }
    214     //
    215     // If expand==false, print Any using the default printer. The output will
    216     // look like
    217     //    type_url: "<type_url>"  value: "serialized_content"
    218     void SetExpandAny(bool expand) {
    219       expand_any_ = expand;
    220     }
    221 
    222     // If non-zero, we truncate all string fields that are  longer than this
    223     // threshold.  This is useful when the proto message has very long strings,
    224     // e.g., dump of encoded image file.
    225     //
    226     // NOTE(hfgong):  Setting a non-zero value breaks round-trip safe
    227     // property of TextFormat::Printer.  That is, from the printed message, we
    228     // cannot fully recover the original string field any more.
    229     void SetTruncateStringFieldLongerThan(
    230         const int64 truncate_string_field_longer_than) {
    231       truncate_string_field_longer_than_ = truncate_string_field_longer_than;
    232     }
    233 
    234     // Register a custom field-specific FieldValuePrinter for fields
    235     // with a particular FieldDescriptor.
    236     // Returns "true" if the registration succeeded, or "false", if there is
    237     // already a printer for that FieldDescriptor.
    238     // Takes ownership of the printer on successful registration.
    239     bool RegisterFieldValuePrinter(const FieldDescriptor* field,
    240                                    const FieldValuePrinter* printer);
    241 
    242    private:
    243     // Forward declaration of an internal class used to print the text
    244     // output to the OutputStream (see text_format.cc for implementation).
    245     class TextGenerator;
    246 
    247     // Internal Print method, used for writing to the OutputStream via
    248     // the TextGenerator class.
    249     void Print(const Message& message,
    250                TextGenerator& generator) const;
    251 
    252     // Print a single field.
    253     void PrintField(const Message& message,
    254                     const Reflection* reflection,
    255                     const FieldDescriptor* field,
    256                     TextGenerator& generator) const;
    257 
    258     // Print a repeated primitive field in short form.
    259     void PrintShortRepeatedField(const Message& message,
    260                                  const Reflection* reflection,
    261                                  const FieldDescriptor* field,
    262                                  TextGenerator& generator) const;
    263 
    264     // Print the name of a field -- i.e. everything that comes before the
    265     // ':' for a single name/value pair.
    266     void PrintFieldName(const Message& message,
    267                         const Reflection* reflection,
    268                         const FieldDescriptor* field,
    269                         TextGenerator& generator) const;
    270 
    271     // Outputs a textual representation of the value of the field supplied on
    272     // the message supplied or the default value if not set.
    273     void PrintFieldValue(const Message& message,
    274                          const Reflection* reflection,
    275                          const FieldDescriptor* field,
    276                          int index,
    277                          TextGenerator& generator) const;
    278 
    279     // Print the fields in an UnknownFieldSet.  They are printed by tag number
    280     // only.  Embedded messages are heuristically identified by attempting to
    281     // parse them.
    282     void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
    283                             TextGenerator& generator) const;
    284 
    285     bool PrintAny(const Message& message, TextGenerator& generator) const;
    286 
    287     int initial_indent_level_;
    288 
    289     bool single_line_mode_;
    290 
    291     bool use_field_number_;
    292 
    293     bool use_short_repeated_primitives_;
    294 
    295     bool hide_unknown_fields_;
    296 
    297     bool print_message_fields_in_index_order_;
    298 
    299     bool expand_any_;
    300 
    301     int64 truncate_string_field_longer_than_;
    302 
    303     google::protobuf::scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
    304     typedef map<const FieldDescriptor*,
    305                 const FieldValuePrinter*> CustomPrinterMap;
    306     CustomPrinterMap custom_printers_;
    307   };
    308 
    309   // Parses a text-format protocol message from the given input stream to
    310   // the given message object. This function parses the human-readable format
    311   // written by Print(). Returns true on success. The message is cleared first,
    312   // even if the function fails -- See Merge() to avoid this behavior.
    313   //
    314   // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
    315   //
    316   // One use for this function is parsing handwritten strings in test code.
    317   // Another use is to parse the output from google::protobuf::Message::DebugString()
    318   // (or ShortDebugString()), because these functions output using
    319   // google::protobuf::TextFormat::Print().
    320   //
    321   // If you would like to read a protocol buffer serialized in the
    322   // (non-human-readable) binary wire format, see
    323   // google::protobuf::MessageLite::ParseFromString().
    324   static bool Parse(io::ZeroCopyInputStream* input, Message* output);
    325   // Like Parse(), but reads directly from a string.
    326   static bool ParseFromString(const string& input, Message* output);
    327 
    328   // Like Parse(), but the data is merged into the given message, as if
    329   // using Message::MergeFrom().
    330   static bool Merge(io::ZeroCopyInputStream* input, Message* output);
    331   // Like Merge(), but reads directly from a string.
    332   static bool MergeFromString(const string& input, Message* output);
    333 
    334   // Parse the given text as a single field value and store it into the
    335   // given field of the given message. If the field is a repeated field,
    336   // the new value will be added to the end
    337   static bool ParseFieldValueFromString(const string& input,
    338                                         const FieldDescriptor* field,
    339                                         Message* message);
    340 
    341   // Interface that TextFormat::Parser can use to find extensions.
    342   // This class may be extended in the future to find more information
    343   // like fields, etc.
    344   class LIBPROTOBUF_EXPORT Finder {
    345    public:
    346     virtual ~Finder();
    347 
    348     // Try to find an extension of *message by fully-qualified field
    349     // name.  Returns NULL if no extension is known for this name or number.
    350     virtual const FieldDescriptor* FindExtension(
    351         Message* message,
    352         const string& name) const = 0;
    353   };
    354 
    355   // A location in the parsed text.
    356   struct ParseLocation {
    357     int line;
    358     int column;
    359 
    360     ParseLocation() : line(-1), column(-1) {}
    361     ParseLocation(int line_param, int column_param)
    362         : line(line_param), column(column_param) {}
    363   };
    364 
    365   // Data structure which is populated with the locations of each field
    366   // value parsed from the text.
    367   class LIBPROTOBUF_EXPORT ParseInfoTree {
    368    public:
    369     ParseInfoTree();
    370     ~ParseInfoTree();
    371 
    372     // Returns the parse location for index-th value of the field in the parsed
    373     // text. If none exists, returns a location with line = -1. Index should be
    374     // -1 for not-repeated fields.
    375     ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
    376 
    377     // Returns the parse info tree for the given field, which must be a message
    378     // type. The nested information tree is owned by the root tree and will be
    379     // deleted when it is deleted.
    380     ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
    381                                     int index) const;
    382 
    383    private:
    384     // Allow the text format parser to record information into the tree.
    385     friend class TextFormat;
    386 
    387     // Records the starting location of a single value for a field.
    388     void RecordLocation(const FieldDescriptor* field, ParseLocation location);
    389 
    390     // Create and records a nested tree for a nested message field.
    391     ParseInfoTree* CreateNested(const FieldDescriptor* field);
    392 
    393     // Defines the map from the index-th field descriptor to its parse location.
    394     typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
    395 
    396     // Defines the map from the index-th field descriptor to the nested parse
    397     // info tree.
    398     typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
    399 
    400     LocationMap locations_;
    401     NestedMap nested_;
    402 
    403     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
    404   };
    405 
    406   // For more control over parsing, use this class.
    407   class LIBPROTOBUF_EXPORT Parser {
    408    public:
    409     Parser();
    410     ~Parser();
    411 
    412     // Like TextFormat::Parse().
    413     bool Parse(io::ZeroCopyInputStream* input, Message* output);
    414     // Like TextFormat::ParseFromString().
    415     bool ParseFromString(const string& input, Message* output);
    416     // Like TextFormat::Merge().
    417     bool Merge(io::ZeroCopyInputStream* input, Message* output);
    418     // Like TextFormat::MergeFromString().
    419     bool MergeFromString(const string& input, Message* output);
    420 
    421     // Set where to report parse errors.  If NULL (the default), errors will
    422     // be printed to stderr.
    423     void RecordErrorsTo(io::ErrorCollector* error_collector) {
    424       error_collector_ = error_collector;
    425     }
    426 
    427     // Set how parser finds extensions.  If NULL (the default), the
    428     // parser will use the standard Reflection object associated with
    429     // the message being parsed.
    430     void SetFinder(Finder* finder) {
    431       finder_ = finder;
    432     }
    433 
    434     // Sets where location information about the parse will be written. If NULL
    435     // (the default), then no location will be written.
    436     void WriteLocationsTo(ParseInfoTree* tree) {
    437       parse_info_tree_ = tree;
    438     }
    439 
    440     // Normally parsing fails if, after parsing, output->IsInitialized()
    441     // returns false.  Call AllowPartialMessage(true) to skip this check.
    442     void AllowPartialMessage(bool allow) {
    443       allow_partial_ = allow;
    444     }
    445 
    446     // Allow field names to be matched case-insensitively.
    447     // This is not advisable if there are fields that only differ in case, or
    448     // if you want to enforce writing in the canonical form.
    449     // This is 'false' by default.
    450     void AllowCaseInsensitiveField(bool allow) {
    451       allow_case_insensitive_field_ = allow;
    452     }
    453 
    454     // Like TextFormat::ParseFieldValueFromString
    455     bool ParseFieldValueFromString(const string& input,
    456                                    const FieldDescriptor* field,
    457                                    Message* output);
    458 
    459 
    460     void AllowFieldNumber(bool allow) {
    461       allow_field_number_ = allow;
    462     }
    463 
    464    private:
    465     // Forward declaration of an internal class used to parse text
    466     // representations (see text_format.cc for implementation).
    467     class ParserImpl;
    468 
    469     // Like TextFormat::Merge().  The provided implementation is used
    470     // to do the parsing.
    471     bool MergeUsingImpl(io::ZeroCopyInputStream* input,
    472                         Message* output,
    473                         ParserImpl* parser_impl);
    474 
    475     io::ErrorCollector* error_collector_;
    476     Finder* finder_;
    477     ParseInfoTree* parse_info_tree_;
    478     bool allow_partial_;
    479     bool allow_case_insensitive_field_;
    480     bool allow_unknown_field_;
    481     bool allow_unknown_enum_;
    482     bool allow_field_number_;
    483     bool allow_relaxed_whitespace_;
    484     bool allow_singular_overwrites_;
    485   };
    486 
    487 
    488  private:
    489   // Hack: ParseInfoTree declares TextFormat as a friend which should extend
    490   // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
    491   // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
    492   // helpers for ParserImpl to call methods of ParseInfoTree.
    493   static inline void RecordLocation(ParseInfoTree* info_tree,
    494                                     const FieldDescriptor* field,
    495                                     ParseLocation location);
    496   static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
    497                                             const FieldDescriptor* field);
    498 
    499   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
    500 };
    501 
    502 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
    503                                        const FieldDescriptor* field,
    504                                        ParseLocation location) {
    505   info_tree->RecordLocation(field, location);
    506 }
    507 
    508 
    509 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
    510     ParseInfoTree* info_tree, const FieldDescriptor* field) {
    511   return info_tree->CreateNested(field);
    512 }
    513 
    514 }  // namespace protobuf
    515 
    516 }  // namespace google
    517 #endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
    518