Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: jschorr (at) google.com (Joseph Schorr)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // Utilities for printing and parsing protocol messages in a human-readable,
     36 // text-based format.
     37 
     38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
     39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
     40 
     41 #include <map>
     42 #include <memory>
     43 #include <string>
     44 #include <vector>
     45 
     46 #include <google/protobuf/stubs/common.h>
     47 #include <google/protobuf/descriptor.h>
     48 #include <google/protobuf/message.h>
     49 
     50 namespace google {
     51 namespace protobuf {
     52 
     53 namespace io {
     54   class ErrorCollector;      // tokenizer.h
     55 }
     56 
     57 // This class implements protocol buffer text format.  Printing and parsing
     58 // protocol messages in text format is useful for debugging and human editing
     59 // of messages.
     60 //
     61 // This class is really a namespace that contains only static methods.
     62 class LIBPROTOBUF_EXPORT TextFormat {
     63  public:
     64   // Outputs a textual representation of the given message to the given
     65   // output stream.
     66   static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
     67 
     68   // Print the fields in an UnknownFieldSet.  They are printed by tag number
     69   // only.  Embedded messages are heuristically identified by attempting to
     70   // parse them.
     71   static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
     72                                  io::ZeroCopyOutputStream* output);
     73 
     74   // Like Print(), but outputs directly to a string.
     75   static bool PrintToString(const Message& message, string* output);
     76 
     77   // Like PrintUnknownFields(), but outputs directly to a string.
     78   static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
     79                                          string* output);
     80 
     81   // Outputs a textual representation of the value of the field supplied on
     82   // the message supplied. For non-repeated fields, an index of -1 must
     83   // be supplied. Note that this method will print the default value for a
     84   // field if it is not set.
     85   static void PrintFieldValueToString(const Message& message,
     86                                       const FieldDescriptor* field,
     87                                       int index,
     88                                       string* output);
     89 
     90   // The default printer that converts scalar values from fields into
     91   // their string representation.
     92   // You can derive from this FieldValuePrinter if you want to have
     93   // fields to be printed in a different way and register it at the
     94   // Printer.
     95   class LIBPROTOBUF_EXPORT FieldValuePrinter {
     96    public:
     97     FieldValuePrinter();
     98     virtual ~FieldValuePrinter();
     99     virtual string PrintBool(bool val) const;
    100     virtual string PrintInt32(int32 val) const;
    101     virtual string PrintUInt32(uint32 val) const;
    102     virtual string PrintInt64(int64 val) const;
    103     virtual string PrintUInt64(uint64 val) const;
    104     virtual string PrintFloat(float val) const;
    105     virtual string PrintDouble(double val) const;
    106     virtual string PrintString(const string& val) const;
    107     virtual string PrintBytes(const string& val) const;
    108     virtual string PrintEnum(int32 val, const string& name) const;
    109     virtual string PrintFieldName(const Message& message,
    110                                   const Reflection* reflection,
    111                                   const FieldDescriptor* field) const;
    112     virtual string PrintMessageStart(const Message& message,
    113                                      int field_index,
    114                                      int field_count,
    115                                      bool single_line_mode) const;
    116     virtual string PrintMessageEnd(const Message& message,
    117                                    int field_index,
    118                                    int field_count,
    119                                    bool single_line_mode) const;
    120 
    121    private:
    122     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
    123   };
    124 
    125   // Class for those users which require more fine-grained control over how
    126   // a protobuffer message is printed out.
    127   class LIBPROTOBUF_EXPORT Printer {
    128    public:
    129     Printer();
    130     ~Printer();
    131 
    132     // Like TextFormat::Print
    133     bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
    134     // Like TextFormat::PrintUnknownFields
    135     bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
    136                             io::ZeroCopyOutputStream* output) const;
    137     // Like TextFormat::PrintToString
    138     bool PrintToString(const Message& message, string* output) const;
    139     // Like TextFormat::PrintUnknownFieldsToString
    140     bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
    141                                     string* output) const;
    142     // Like TextFormat::PrintFieldValueToString
    143     void PrintFieldValueToString(const Message& message,
    144                                  const FieldDescriptor* field,
    145                                  int index,
    146                                  string* output) const;
    147 
    148     // Adjust the initial indent level of all output.  Each indent level is
    149     // equal to two spaces.
    150     void SetInitialIndentLevel(int indent_level) {
    151       initial_indent_level_ = indent_level;
    152     }
    153 
    154     // If printing in single line mode, then the entire message will be output
    155     // on a single line with no line breaks.
    156     void SetSingleLineMode(bool single_line_mode) {
    157       single_line_mode_ = single_line_mode;
    158     }
    159 
    160     bool IsInSingleLineMode() {
    161       return single_line_mode_;
    162     }
    163 
    164     // If use_field_number is true, uses field number instead of field name.
    165     void SetUseFieldNumber(bool use_field_number) {
    166       use_field_number_ = use_field_number;
    167     }
    168 
    169     // Set true to print repeated primitives in a format like:
    170     //   field_name: [1, 2, 3, 4]
    171     // instead of printing each value on its own line.  Short format applies
    172     // only to primitive values -- i.e. everything except strings and
    173     // sub-messages/groups.
    174     void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
    175       use_short_repeated_primitives_ = use_short_repeated_primitives;
    176     }
    177 
    178     // Set true to output UTF-8 instead of ASCII.  The only difference
    179     // is that bytes >= 0x80 in string fields will not be escaped,
    180     // because they are assumed to be part of UTF-8 multi-byte
    181     // sequences. This will change the default FieldValuePrinter.
    182     void SetUseUtf8StringEscaping(bool as_utf8);
    183 
    184     // Set the default FieldValuePrinter that is used for all fields that
    185     // don't have a field-specific printer registered.
    186     // Takes ownership of the printer.
    187     void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
    188 
    189     // Sets whether we want to hide unknown fields or not.
    190     // Usually unknown fields are printed in a generic way that includes the
    191     // tag number of the field instead of field name. However, sometimes it
    192     // is useful to be able to print the message without unknown fields (e.g.
    193     // for the python protobuf version to maintain consistency between its pure
    194     // python and c++ implementations).
    195     void SetHideUnknownFields(bool hide) {
    196       hide_unknown_fields_ = hide;
    197     }
    198 
    199     // If print_message_fields_in_index_order is true, print fields of a proto
    200     // message using the order defined in source code instead of the field
    201     // number. By default, use the field number order.
    202     void SetPrintMessageFieldsInIndexOrder(
    203         bool print_message_fields_in_index_order) {
    204       print_message_fields_in_index_order_ =
    205           print_message_fields_in_index_order;
    206     }
    207 
    208     // Register a custom field-specific FieldValuePrinter for fields
    209     // with a particular FieldDescriptor.
    210     // Returns "true" if the registration succeeded, or "false", if there is
    211     // already a printer for that FieldDescriptor.
    212     // Takes ownership of the printer on successful registration.
    213     bool RegisterFieldValuePrinter(const FieldDescriptor* field,
    214                                    const FieldValuePrinter* printer);
    215 
    216    private:
    217     // Forward declaration of an internal class used to print the text
    218     // output to the OutputStream (see text_format.cc for implementation).
    219     class TextGenerator;
    220 
    221     // Internal Print method, used for writing to the OutputStream via
    222     // the TextGenerator class.
    223     void Print(const Message& message,
    224                TextGenerator& generator) const;
    225 
    226     // Print a single field.
    227     void PrintField(const Message& message,
    228                     const Reflection* reflection,
    229                     const FieldDescriptor* field,
    230                     TextGenerator& generator) const;
    231 
    232     // Print a repeated primitive field in short form.
    233     void PrintShortRepeatedField(const Message& message,
    234                                  const Reflection* reflection,
    235                                  const FieldDescriptor* field,
    236                                  TextGenerator& generator) const;
    237 
    238     // Print the name of a field -- i.e. everything that comes before the
    239     // ':' for a single name/value pair.
    240     void PrintFieldName(const Message& message,
    241                         const Reflection* reflection,
    242                         const FieldDescriptor* field,
    243                         TextGenerator& generator) const;
    244 
    245     // Outputs a textual representation of the value of the field supplied on
    246     // the message supplied or the default value if not set.
    247     void PrintFieldValue(const Message& message,
    248                          const Reflection* reflection,
    249                          const FieldDescriptor* field,
    250                          int index,
    251                          TextGenerator& generator) const;
    252 
    253     // Print the fields in an UnknownFieldSet.  They are printed by tag number
    254     // only.  Embedded messages are heuristically identified by attempting to
    255     // parse them.
    256     void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
    257                             TextGenerator& generator) const;
    258 
    259     int initial_indent_level_;
    260 
    261     bool single_line_mode_;
    262 
    263     bool use_field_number_;
    264 
    265     bool use_short_repeated_primitives_;
    266 
    267     bool hide_unknown_fields_;
    268 
    269     bool print_message_fields_in_index_order_;
    270 
    271     scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
    272     typedef map<const FieldDescriptor*,
    273                 const FieldValuePrinter*> CustomPrinterMap;
    274     CustomPrinterMap custom_printers_;
    275   };
    276 
    277   // Parses a text-format protocol message from the given input stream to
    278   // the given message object.  This function parses the format written
    279   // by Print().
    280   static bool Parse(io::ZeroCopyInputStream* input, Message* output);
    281   // Like Parse(), but reads directly from a string.
    282   static bool ParseFromString(const string& input, Message* output);
    283 
    284   // Like Parse(), but the data is merged into the given message, as if
    285   // using Message::MergeFrom().
    286   static bool Merge(io::ZeroCopyInputStream* input, Message* output);
    287   // Like Merge(), but reads directly from a string.
    288   static bool MergeFromString(const string& input, Message* output);
    289 
    290   // Parse the given text as a single field value and store it into the
    291   // given field of the given message. If the field is a repeated field,
    292   // the new value will be added to the end
    293   static bool ParseFieldValueFromString(const string& input,
    294                                         const FieldDescriptor* field,
    295                                         Message* message);
    296 
    297   // Interface that TextFormat::Parser can use to find extensions.
    298   // This class may be extended in the future to find more information
    299   // like fields, etc.
    300   class LIBPROTOBUF_EXPORT Finder {
    301    public:
    302     virtual ~Finder();
    303 
    304     // Try to find an extension of *message by fully-qualified field
    305     // name.  Returns NULL if no extension is known for this name or number.
    306     virtual const FieldDescriptor* FindExtension(
    307         Message* message,
    308         const string& name) const = 0;
    309   };
    310 
    311   // A location in the parsed text.
    312   struct ParseLocation {
    313     int line;
    314     int column;
    315 
    316     ParseLocation() : line(-1), column(-1) {}
    317     ParseLocation(int line_param, int column_param)
    318         : line(line_param), column(column_param) {}
    319   };
    320 
    321   // Data structure which is populated with the locations of each field
    322   // value parsed from the text.
    323   class LIBPROTOBUF_EXPORT ParseInfoTree {
    324    public:
    325     ParseInfoTree();
    326     ~ParseInfoTree();
    327 
    328     // Returns the parse location for index-th value of the field in the parsed
    329     // text. If none exists, returns a location with line = -1. Index should be
    330     // -1 for not-repeated fields.
    331     ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
    332 
    333     // Returns the parse info tree for the given field, which must be a message
    334     // type. The nested information tree is owned by the root tree and will be
    335     // deleted when it is deleted.
    336     ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
    337                                     int index) const;
    338 
    339    private:
    340     // Allow the text format parser to record information into the tree.
    341     friend class TextFormat;
    342 
    343     // Records the starting location of a single value for a field.
    344     void RecordLocation(const FieldDescriptor* field, ParseLocation location);
    345 
    346     // Create and records a nested tree for a nested message field.
    347     ParseInfoTree* CreateNested(const FieldDescriptor* field);
    348 
    349     // Defines the map from the index-th field descriptor to its parse location.
    350     typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
    351 
    352     // Defines the map from the index-th field descriptor to the nested parse
    353     // info tree.
    354     typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
    355 
    356     LocationMap locations_;
    357     NestedMap nested_;
    358 
    359     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
    360   };
    361 
    362   // For more control over parsing, use this class.
    363   class LIBPROTOBUF_EXPORT Parser {
    364    public:
    365     Parser();
    366     ~Parser();
    367 
    368     // Like TextFormat::Parse().
    369     bool Parse(io::ZeroCopyInputStream* input, Message* output);
    370     // Like TextFormat::ParseFromString().
    371     bool ParseFromString(const string& input, Message* output);
    372     // Like TextFormat::Merge().
    373     bool Merge(io::ZeroCopyInputStream* input, Message* output);
    374     // Like TextFormat::MergeFromString().
    375     bool MergeFromString(const string& input, Message* output);
    376 
    377     // Set where to report parse errors.  If NULL (the default), errors will
    378     // be printed to stderr.
    379     void RecordErrorsTo(io::ErrorCollector* error_collector) {
    380       error_collector_ = error_collector;
    381     }
    382 
    383     // Set how parser finds extensions.  If NULL (the default), the
    384     // parser will use the standard Reflection object associated with
    385     // the message being parsed.
    386     void SetFinder(Finder* finder) {
    387       finder_ = finder;
    388     }
    389 
    390     // Sets where location information about the parse will be written. If NULL
    391     // (the default), then no location will be written.
    392     void WriteLocationsTo(ParseInfoTree* tree) {
    393       parse_info_tree_ = tree;
    394     }
    395 
    396     // Normally parsing fails if, after parsing, output->IsInitialized()
    397     // returns false.  Call AllowPartialMessage(true) to skip this check.
    398     void AllowPartialMessage(bool allow) {
    399       allow_partial_ = allow;
    400     }
    401 
    402     // Allow field names to be matched case-insensitively.
    403     // This is not advisable if there are fields that only differ in case, or
    404     // if you want to enforce writing in the canonical form.
    405     // This is 'false' by default.
    406     void AllowCaseInsensitiveField(bool allow) {
    407       allow_case_insensitive_field_ = allow;
    408     }
    409 
    410     // Like TextFormat::ParseFieldValueFromString
    411     bool ParseFieldValueFromString(const string& input,
    412                                    const FieldDescriptor* field,
    413                                    Message* output);
    414 
    415 
    416     void AllowFieldNumber(bool allow) {
    417       allow_field_number_ = allow;
    418     }
    419 
    420    private:
    421     // Forward declaration of an internal class used to parse text
    422     // representations (see text_format.cc for implementation).
    423     class ParserImpl;
    424 
    425     // Like TextFormat::Merge().  The provided implementation is used
    426     // to do the parsing.
    427     bool MergeUsingImpl(io::ZeroCopyInputStream* input,
    428                         Message* output,
    429                         ParserImpl* parser_impl);
    430 
    431     io::ErrorCollector* error_collector_;
    432     Finder* finder_;
    433     ParseInfoTree* parse_info_tree_;
    434     bool allow_partial_;
    435     bool allow_case_insensitive_field_;
    436     bool allow_unknown_field_;
    437     bool allow_unknown_enum_;
    438     bool allow_field_number_;
    439     bool allow_relaxed_whitespace_;
    440     bool allow_singular_overwrites_;
    441   };
    442 
    443 
    444  private:
    445   // Hack: ParseInfoTree declares TextFormat as a friend which should extend
    446   // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
    447   // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
    448   // helpers for ParserImpl to call methods of ParseInfoTree.
    449   static inline void RecordLocation(ParseInfoTree* info_tree,
    450                                     const FieldDescriptor* field,
    451                                     ParseLocation location);
    452   static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
    453                                             const FieldDescriptor* field);
    454 
    455   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
    456 };
    457 
    458 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
    459                                        const FieldDescriptor* field,
    460                                        ParseLocation location) {
    461   info_tree->RecordLocation(field, location);
    462 }
    463 
    464 
    465 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
    466     ParseInfoTree* info_tree, const FieldDescriptor* field) {
    467   return info_tree->CreateNested(field);
    468 }
    469 
    470 }  // namespace protobuf
    471 
    472 }  // namespace google
    473 #endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
    474