Home | History | Annotate | Download | only in internal
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 #ifndef GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
     32 #define GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
     33 
     34 #include <stack>
     35 #include <string>
     36 
     37 #include <google/protobuf/stubs/common.h>
     38 #include <google/protobuf/stubs/stringpiece.h>
     39 #include <google/protobuf/stubs/status.h>
     40 
     41 namespace google {
     42 namespace util {
     43 class Status;
     44 }  // namespace util
     45 
     46 namespace protobuf {
     47 namespace util {
     48 namespace converter {
     49 
     50 class ObjectWriter;
     51 
     52 // A JSON parser that can parse a stream of JSON chunks rather than needing the
     53 // entire JSON string up front. It is a modified version of the parser in
     54 // //net/proto/json/json-parser.h that has been changed in the following ways:
     55 // - Changed from recursion to an explicit stack to allow resumption
     56 // - Added support for int64 and uint64 numbers
     57 // - Removed support for octal and decimal escapes
     58 // - Removed support for numeric keys
     59 // - Removed support for functions (javascript)
     60 // - Removed some lax-comma support (but kept trailing comma support)
     61 // - Writes directly to an ObjectWriter rather than using subclassing
     62 //
     63 // Here is an example usage:
     64 // JsonStreamParser parser(ow_.get());
     65 // util::Status result = parser.Parse(chunk1);
     66 // result.Update(parser.Parse(chunk2));
     67 // result.Update(parser.FinishParse());
     68 // GOOGLE_DCHECK(result.ok()) << "Failed to parse JSON";
     69 //
     70 // This parser is thread-compatible as long as only one thread is calling a
     71 // Parse() method at a time.
     72 class LIBPROTOBUF_EXPORT JsonStreamParser {
     73  public:
     74   // Creates a JsonStreamParser that will write to the given ObjectWriter.
     75   explicit JsonStreamParser(ObjectWriter* ow);
     76   virtual ~JsonStreamParser();
     77 
     78   // Parses a UTF-8 encoded JSON string from a StringPiece.
     79   util::Status Parse(StringPiece json);
     80 
     81 
     82   // Finish parsing the JSON string.
     83   util::Status FinishParse();
     84 
     85 
     86  private:
     87   enum TokenType {
     88     BEGIN_STRING,     // " or '
     89     BEGIN_NUMBER,     // - or digit
     90     BEGIN_TRUE,       // true
     91     BEGIN_FALSE,      // false
     92     BEGIN_NULL,       // null
     93     BEGIN_OBJECT,     // {
     94     END_OBJECT,       // }
     95     BEGIN_ARRAY,      // [
     96     END_ARRAY,        // ]
     97     ENTRY_SEPARATOR,  // :
     98     VALUE_SEPARATOR,  // ,
     99     BEGIN_KEY,        // letter, _, $ or digit.  Must begin with non-digit
    100     UNKNOWN           // Unknown token or we ran out of the stream.
    101   };
    102 
    103   enum ParseType {
    104     VALUE,        // Expects a {, [, true, false, null, string or number
    105     OBJ_MID,      // Expects a ',' or }
    106     ENTRY,        // Expects a key or }
    107     ENTRY_MID,    // Expects a :
    108     ARRAY_VALUE,  // Expects a value or ]
    109     ARRAY_MID     // Expects a ',' or ]
    110   };
    111 
    112   // Holds the result of parsing a number
    113   struct NumberResult {
    114     enum Type { DOUBLE, INT, UINT };
    115     Type type;
    116     union {
    117       double double_val;
    118       int64 int_val;
    119       uint64 uint_val;
    120     };
    121   };
    122 
    123   // Parses a single chunk of JSON, returning an error if the JSON was invalid.
    124   util::Status ParseChunk(StringPiece json);
    125 
    126   // Runs the parser based on stack_ and p_, until the stack is empty or p_ runs
    127   // out of data. If we unexpectedly run out of p_ we push the latest back onto
    128   // the stack and return.
    129   util::Status RunParser();
    130 
    131   // Parses a value from p_ and writes it to ow_.
    132   // A value may be an object, array, true, false, null, string or number.
    133   util::Status ParseValue(TokenType type);
    134 
    135   // Parses a string and writes it out to the ow_.
    136   util::Status ParseString();
    137 
    138   // Parses a string, storing the result in parsed_.
    139   util::Status ParseStringHelper();
    140 
    141   // This function parses unicode escape sequences in strings. It returns an
    142   // error when there's a parsing error, either the size is not the expected
    143   // size or a character is not a hex digit.  When it returns str will contain
    144   // what has been successfully parsed so far.
    145   util::Status ParseUnicodeEscape();
    146 
    147   // Expects p_ to point to a JSON number, writes the number to the writer using
    148   // the appropriate Render method based on the type of number.
    149   util::Status ParseNumber();
    150 
    151   // Parse a number into a NumberResult, reporting an error if no number could
    152   // be parsed. This method will try to parse into a uint64, int64, or double
    153   // based on whether the number was positive or negative or had a decimal
    154   // component.
    155   util::Status ParseNumberHelper(NumberResult* result);
    156 
    157   // Handles a { during parsing of a value.
    158   util::Status HandleBeginObject();
    159 
    160   // Parses from the ENTRY state.
    161   util::Status ParseEntry(TokenType type);
    162 
    163   // Parses from the ENTRY_MID state.
    164   util::Status ParseEntryMid(TokenType type);
    165 
    166   // Parses from the OBJ_MID state.
    167   util::Status ParseObjectMid(TokenType type);
    168 
    169   // Handles a [ during parsing of a value.
    170   util::Status HandleBeginArray();
    171 
    172   // Parses from the ARRAY_VALUE state.
    173   util::Status ParseArrayValue(TokenType type);
    174 
    175   // Parses from the ARRAY_MID state.
    176   util::Status ParseArrayMid(TokenType type);
    177 
    178   // Expects p_ to point to an unquoted literal
    179   util::Status ParseTrue();
    180   util::Status ParseFalse();
    181   util::Status ParseNull();
    182 
    183   // Report a failure as a util::Status.
    184   util::Status ReportFailure(StringPiece message);
    185 
    186   // Report a failure due to an UNKNOWN token type. We check if we hit the
    187   // end of the stream and if we're finishing or not to detect what type of
    188   // status to return in this case.
    189   util::Status ReportUnknown(StringPiece message);
    190 
    191   // Advance p_ past all whitespace or until the end of the string.
    192   void SkipWhitespace();
    193 
    194   // Advance p_ one UTF-8 character
    195   void Advance();
    196 
    197   // Expects p_ to point to the beginning of a key.
    198   util::Status ParseKey();
    199 
    200   // Return the type of the next token at p_.
    201   TokenType GetNextTokenType();
    202 
    203   // The object writer to write parse events to.
    204   ObjectWriter* ow_;
    205 
    206   // The stack of parsing we still need to do. When the stack runs empty we will
    207   // have parsed a single value from the root (e.g. an object or list).
    208   std::stack<ParseType> stack_;
    209 
    210   // Contains any leftover text from a previous chunk that we weren't able to
    211   // fully parse, for example the start of a key or number.
    212   string leftover_;
    213 
    214   // The current chunk of JSON being parsed. Primarily used for providing
    215   // context during error reporting.
    216   StringPiece json_;
    217 
    218   // A pointer within the current JSON being parsed, used to track location.
    219   StringPiece p_;
    220 
    221   // Stores the last key read, as we separate parsing of keys and values.
    222   StringPiece key_;
    223 
    224   // Storage for key_ if we need to keep ownership, for example between chunks
    225   // or if the key was unescaped from a JSON string.
    226   string key_storage_;
    227 
    228   // True during the FinishParse() call, so we know that any errors are fatal.
    229   // For example an unterminated string will normally result in cancelling and
    230   // trying during the next chunk, but during FinishParse() it is an error.
    231   bool finishing_;
    232 
    233   // String we parsed during a call to ParseStringHelper().
    234   StringPiece parsed_;
    235 
    236   // Storage for the string we parsed. This may be empty if the string was able
    237   // to be parsed directly from the input.
    238   string parsed_storage_;
    239 
    240   // The character that opened the string, either ' or ".
    241   // A value of 0 indicates that string parsing is not in process.
    242   char string_open_;
    243 
    244   // Storage for the chunk that are being parsed in ParseChunk().
    245   string chunk_storage_;
    246 
    247   // Whether to allow non UTF-8 encoded input and replace invalid code points.
    248   bool coerce_to_utf8_;
    249 
    250   GOOGLE_DISALLOW_IMPLICIT_CONSTRUCTORS(JsonStreamParser);
    251 };
    252 
    253 }  // namespace converter
    254 }  // namespace util
    255 }  // namespace protobuf
    256 
    257 }  // namespace google
    258 #endif  // GOOGLE_PROTOBUF_UTIL_CONVERTER_JSON_STREAM_PARSER_H__
    259