Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: jschorr (at) google.com (Joseph Schorr)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // Utilities for printing and parsing protocol messages in a human-readable,
     36 // text-based format.
     37 
     38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
     39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
     40 
     41 #include <map>
     42 #include <string>
     43 #include <vector>
     44 #include <google/protobuf/stubs/common.h>
     45 #include <google/protobuf/message.h>
     46 #include <google/protobuf/descriptor.h>
     47 
     48 namespace google {
     49 namespace protobuf {
     50 
     51 namespace io {
     52   class ErrorCollector;      // tokenizer.h
     53 }
     54 
     55 // This class implements protocol buffer text format.  Printing and parsing
     56 // protocol messages in text format is useful for debugging and human editing
     57 // of messages.
     58 //
     59 // This class is really a namespace that contains only static methods.
     60 class LIBPROTOBUF_EXPORT TextFormat {
     61  public:
     62   // Outputs a textual representation of the given message to the given
     63   // output stream.
     64   static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
     65 
     66   // Print the fields in an UnknownFieldSet.  They are printed by tag number
     67   // only.  Embedded messages are heuristically identified by attempting to
     68   // parse them.
     69   static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
     70                                  io::ZeroCopyOutputStream* output);
     71 
     72   // Like Print(), but outputs directly to a string.
     73   static bool PrintToString(const Message& message, string* output);
     74 
     75   // Like PrintUnknownFields(), but outputs directly to a string.
     76   static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
     77                                          string* output);
     78 
     79   // Outputs a textual representation of the value of the field supplied on
     80   // the message supplied. For non-repeated fields, an index of -1 must
     81   // be supplied. Note that this method will print the default value for a
     82   // field if it is not set.
     83   static void PrintFieldValueToString(const Message& message,
     84                                       const FieldDescriptor* field,
     85                                       int index,
     86                                       string* output);
     87 
     88   // Class for those users which require more fine-grained control over how
     89   // a protobuffer message is printed out.
     90   class LIBPROTOBUF_EXPORT Printer {
     91    public:
     92     Printer();
     93     ~Printer();
     94 
     95     // Like TextFormat::Print
     96     bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
     97     // Like TextFormat::PrintUnknownFields
     98     bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
     99                             io::ZeroCopyOutputStream* output) const;
    100     // Like TextFormat::PrintToString
    101     bool PrintToString(const Message& message, string* output) const;
    102     // Like TextFormat::PrintUnknownFieldsToString
    103     bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
    104                                     string* output) const;
    105     // Like TextFormat::PrintFieldValueToString
    106     void PrintFieldValueToString(const Message& message,
    107                                  const FieldDescriptor* field,
    108                                  int index,
    109                                  string* output) const;
    110 
    111     // Adjust the initial indent level of all output.  Each indent level is
    112     // equal to two spaces.
    113     void SetInitialIndentLevel(int indent_level) {
    114       initial_indent_level_ = indent_level;
    115     }
    116 
    117     // If printing in single line mode, then the entire message will be output
    118     // on a single line with no line breaks.
    119     void SetSingleLineMode(bool single_line_mode) {
    120       single_line_mode_ = single_line_mode;
    121     }
    122 
    123     // Set true to print repeated primitives in a format like:
    124     //   field_name: [1, 2, 3, 4]
    125     // instead of printing each value on its own line.  Short format applies
    126     // only to primitive values -- i.e. everything except strings and
    127     // sub-messages/groups.
    128     void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
    129       use_short_repeated_primitives_ = use_short_repeated_primitives;
    130     }
    131 
    132     // Set true to output UTF-8 instead of ASCII.  The only difference
    133     // is that bytes >= 0x80 in string fields will not be escaped,
    134     // because they are assumed to be part of UTF-8 multi-byte
    135     // sequences.
    136     void SetUseUtf8StringEscaping(bool as_utf8) {
    137       utf8_string_escaping_ = as_utf8;
    138     }
    139 
    140    private:
    141     // Forward declaration of an internal class used to print the text
    142     // output to the OutputStream (see text_format.cc for implementation).
    143     class TextGenerator;
    144 
    145     // Internal Print method, used for writing to the OutputStream via
    146     // the TextGenerator class.
    147     void Print(const Message& message,
    148                TextGenerator& generator) const;
    149 
    150     // Print a single field.
    151     void PrintField(const Message& message,
    152                     const Reflection* reflection,
    153                     const FieldDescriptor* field,
    154                     TextGenerator& generator) const;
    155 
    156     // Print a repeated primitive field in short form.
    157     void PrintShortRepeatedField(const Message& message,
    158                                  const Reflection* reflection,
    159                                  const FieldDescriptor* field,
    160                                  TextGenerator& generator) const;
    161 
    162     // Print the name of a field -- i.e. everything that comes before the
    163     // ':' for a single name/value pair.
    164     void PrintFieldName(const Message& message,
    165                         const Reflection* reflection,
    166                         const FieldDescriptor* field,
    167                         TextGenerator& generator) const;
    168 
    169     // Outputs a textual representation of the value of the field supplied on
    170     // the message supplied or the default value if not set.
    171     void PrintFieldValue(const Message& message,
    172                          const Reflection* reflection,
    173                          const FieldDescriptor* field,
    174                          int index,
    175                          TextGenerator& generator) const;
    176 
    177     // Print the fields in an UnknownFieldSet.  They are printed by tag number
    178     // only.  Embedded messages are heuristically identified by attempting to
    179     // parse them.
    180     void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
    181                             TextGenerator& generator) const;
    182 
    183     int initial_indent_level_;
    184 
    185     bool single_line_mode_;
    186 
    187     bool use_short_repeated_primitives_;
    188 
    189     bool utf8_string_escaping_;
    190   };
    191 
    192   // Parses a text-format protocol message from the given input stream to
    193   // the given message object.  This function parses the format written
    194   // by Print().
    195   static bool Parse(io::ZeroCopyInputStream* input, Message* output);
    196   // Like Parse(), but reads directly from a string.
    197   static bool ParseFromString(const string& input, Message* output);
    198 
    199   // Like Parse(), but the data is merged into the given message, as if
    200   // using Message::MergeFrom().
    201   static bool Merge(io::ZeroCopyInputStream* input, Message* output);
    202   // Like Merge(), but reads directly from a string.
    203   static bool MergeFromString(const string& input, Message* output);
    204 
    205   // Parse the given text as a single field value and store it into the
    206   // given field of the given message. If the field is a repeated field,
    207   // the new value will be added to the end
    208   static bool ParseFieldValueFromString(const string& input,
    209                                         const FieldDescriptor* field,
    210                                         Message* message);
    211 
    212   // Interface that TextFormat::Parser can use to find extensions.
    213   // This class may be extended in the future to find more information
    214   // like fields, etc.
    215   class LIBPROTOBUF_EXPORT Finder {
    216    public:
    217     virtual ~Finder();
    218 
    219     // Try to find an extension of *message by fully-qualified field
    220     // name.  Returns NULL if no extension is known for this name or number.
    221     virtual const FieldDescriptor* FindExtension(
    222         Message* message,
    223         const string& name) const = 0;
    224   };
    225 
    226   // A location in the parsed text.
    227   struct ParseLocation {
    228     int line;
    229     int column;
    230 
    231     ParseLocation() : line(-1), column(-1) {}
    232     ParseLocation(int line_param, int column_param)
    233         : line(line_param), column(column_param) {}
    234   };
    235 
    236   // Data structure which is populated with the locations of each field
    237   // value parsed from the text.
    238   class LIBPROTOBUF_EXPORT ParseInfoTree {
    239    public:
    240     ParseInfoTree();
    241     ~ParseInfoTree();
    242 
    243     // Returns the parse location for index-th value of the field in the parsed
    244     // text. If none exists, returns a location with line = -1. Index should be
    245     // -1 for not-repeated fields.
    246     ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
    247 
    248     // Returns the parse info tree for the given field, which must be a message
    249     // type. The nested information tree is owned by the root tree and will be
    250     // deleted when it is deleted.
    251     ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
    252                                     int index) const;
    253 
    254    private:
    255     // Allow the text format parser to record information into the tree.
    256     friend class TextFormat;
    257 
    258     // Records the starting location of a single value for a field.
    259     void RecordLocation(const FieldDescriptor* field, ParseLocation location);
    260 
    261     // Create and records a nested tree for a nested message field.
    262     ParseInfoTree* CreateNested(const FieldDescriptor* field);
    263 
    264     // Defines the map from the index-th field descriptor to its parse location.
    265     typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
    266 
    267     // Defines the map from the index-th field descriptor to the nested parse
    268     // info tree.
    269     typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
    270 
    271     LocationMap locations_;
    272     NestedMap nested_;
    273 
    274     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
    275   };
    276 
    277   // For more control over parsing, use this class.
    278   class LIBPROTOBUF_EXPORT Parser {
    279    public:
    280     Parser();
    281     ~Parser();
    282 
    283     // Like TextFormat::Parse().
    284     bool Parse(io::ZeroCopyInputStream* input, Message* output);
    285     // Like TextFormat::ParseFromString().
    286     bool ParseFromString(const string& input, Message* output);
    287     // Like TextFormat::Merge().
    288     bool Merge(io::ZeroCopyInputStream* input, Message* output);
    289     // Like TextFormat::MergeFromString().
    290     bool MergeFromString(const string& input, Message* output);
    291 
    292     // Set where to report parse errors.  If NULL (the default), errors will
    293     // be printed to stderr.
    294     void RecordErrorsTo(io::ErrorCollector* error_collector) {
    295       error_collector_ = error_collector;
    296     }
    297 
    298     // Set how parser finds extensions.  If NULL (the default), the
    299     // parser will use the standard Reflection object associated with
    300     // the message being parsed.
    301     void SetFinder(Finder* finder) {
    302       finder_ = finder;
    303     }
    304 
    305     // Sets where location information about the parse will be written. If NULL
    306     // (the default), then no location will be written.
    307     void WriteLocationsTo(ParseInfoTree* tree) {
    308       parse_info_tree_ = tree;
    309     }
    310 
    311     // Normally parsing fails if, after parsing, output->IsInitialized()
    312     // returns false.  Call AllowPartialMessage(true) to skip this check.
    313     void AllowPartialMessage(bool allow) {
    314       allow_partial_ = allow;
    315     }
    316 
    317     // Like TextFormat::ParseFieldValueFromString
    318     bool ParseFieldValueFromString(const string& input,
    319                                    const FieldDescriptor* field,
    320                                    Message* output);
    321 
    322 
    323    private:
    324     // Forward declaration of an internal class used to parse text
    325     // representations (see text_format.cc for implementation).
    326     class ParserImpl;
    327 
    328     // Like TextFormat::Merge().  The provided implementation is used
    329     // to do the parsing.
    330     bool MergeUsingImpl(io::ZeroCopyInputStream* input,
    331                         Message* output,
    332                         ParserImpl* parser_impl);
    333 
    334     io::ErrorCollector* error_collector_;
    335     Finder* finder_;
    336     ParseInfoTree* parse_info_tree_;
    337     bool allow_partial_;
    338     bool allow_unknown_field_;
    339   };
    340 
    341  private:
    342   // Hack: ParseInfoTree declares TextFormat as a friend which should extend
    343   // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
    344   // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
    345   // helpers for ParserImpl to call methods of ParseInfoTree.
    346   static inline void RecordLocation(ParseInfoTree* info_tree,
    347                                     const FieldDescriptor* field,
    348                                     ParseLocation location);
    349   static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
    350                                             const FieldDescriptor* field);
    351 
    352   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
    353 };
    354 
    355 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
    356                                        const FieldDescriptor* field,
    357                                        ParseLocation location) {
    358   info_tree->RecordLocation(field, location);
    359 }
    360 
    361 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
    362     ParseInfoTree* info_tree, const FieldDescriptor* field) {
    363   return info_tree->CreateNested(field);
    364 }
    365 
    366 }  // namespace protobuf
    367 
    368 }  // namespace google
    369 #endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
    370