Home | History | Annotate | Download | only in io
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // This file contains the CodedInputStream and CodedOutputStream classes,
     36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
     37 // and allow you to read or write individual pieces of data in various
     38 // formats.  In particular, these implement the varint encoding for
     39 // integers, a simple variable-length encoding in which smaller numbers
     40 // take fewer bytes.
     41 //
     42 // Typically these classes will only be used internally by the protocol
     43 // buffer library in order to encode and decode protocol buffers.  Clients
     44 // of the library only need to know about this class if they wish to write
     45 // custom message parsing or serialization procedures.
     46 //
     47 // CodedOutputStream example:
     48 //   // Write some data to "myfile".  First we write a 4-byte "magic number"
     49 //   // to identify the file type, then write a length-delimited string.  The
     50 //   // string is composed of a varint giving the length followed by the raw
     51 //   // bytes.
     52 //   int fd = open("myfile", O_CREAT | O_WRONLY);
     53 //   ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
     54 //   CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
     55 //
     56 //   int magic_number = 1234;
     57 //   char text[] = "Hello world!";
     58 //   coded_output->WriteLittleEndian32(magic_number);
     59 //   coded_output->WriteVarint32(strlen(text));
     60 //   coded_output->WriteRaw(text, strlen(text));
     61 //
     62 //   delete coded_output;
     63 //   delete raw_output;
     64 //   close(fd);
     65 //
     66 // CodedInputStream example:
     67 //   // Read a file created by the above code.
     68 //   int fd = open("myfile", O_RDONLY);
     69 //   ZeroCopyInputStream* raw_input = new FileInputStream(fd);
     70 //   CodedInputStream coded_input = new CodedInputStream(raw_input);
     71 //
     72 //   coded_input->ReadLittleEndian32(&magic_number);
     73 //   if (magic_number != 1234) {
     74 //     cerr << "File not in expected format." << endl;
     75 //     return;
     76 //   }
     77 //
     78 //   uint32 size;
     79 //   coded_input->ReadVarint32(&size);
     80 //
     81 //   char* text = new char[size + 1];
     82 //   coded_input->ReadRaw(buffer, size);
     83 //   text[size] = '\0';
     84 //
     85 //   delete coded_input;
     86 //   delete raw_input;
     87 //   close(fd);
     88 //
     89 //   cout << "Text is: " << text << endl;
     90 //   delete [] text;
     91 //
     92 // For those who are interested, varint encoding is defined as follows:
     93 //
     94 // The encoding operates on unsigned integers of up to 64 bits in length.
     95 // Each byte of the encoded value has the format:
     96 // * bits 0-6: Seven bits of the number being encoded.
     97 // * bit 7: Zero if this is the last byte in the encoding (in which
     98 //   case all remaining bits of the number are zero) or 1 if
     99 //   more bytes follow.
    100 // The first byte contains the least-significant 7 bits of the number, the
    101 // second byte (if present) contains the next-least-significant 7 bits,
    102 // and so on.  So, the binary number 1011000101011 would be encoded in two
    103 // bytes as "10101011 00101100".
    104 //
    105 // In theory, varint could be used to encode integers of any length.
    106 // However, for practicality we set a limit at 64 bits.  The maximum encoded
    107 // length of a number is thus 10 bytes.
    108 
    109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
    110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
    111 
    112 #include <assert.h>
    113 #include <string>
    114 #include <utility>
    115 #ifdef _MSC_VER
    116   // Assuming windows is always little-endian.
    117   #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
    118     #define PROTOBUF_LITTLE_ENDIAN 1
    119   #endif
    120   #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
    121     // If MSVC has "/RTCc" set, it will complain about truncating casts at
    122     // runtime.  This file contains some intentional truncating casts.
    123     #pragma runtime_checks("c", off)
    124   #endif
    125 #else
    126   #include <sys/param.h>   // __BYTE_ORDER
    127   #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
    128          (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
    129       !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
    130     #define PROTOBUF_LITTLE_ENDIAN 1
    131   #endif
    132 #endif
    133 #include <google/protobuf/stubs/common.h>
    134 
    135 namespace google {
    136 
    137 namespace protobuf {
    138 
    139 class DescriptorPool;
    140 class MessageFactory;
    141 
    142 namespace io {
    143 
    144 // Defined in this file.
    145 class CodedInputStream;
    146 class CodedOutputStream;
    147 
    148 // Defined in other files.
    149 class ZeroCopyInputStream;           // zero_copy_stream.h
    150 class ZeroCopyOutputStream;          // zero_copy_stream.h
    151 
    152 // Class which reads and decodes binary data which is composed of varint-
    153 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyInputStream.
    154 // Most users will not need to deal with CodedInputStream.
    155 //
    156 // Most methods of CodedInputStream that return a bool return false if an
    157 // underlying I/O error occurs or if the data is malformed.  Once such a
    158 // failure occurs, the CodedInputStream is broken and is no longer useful.
    159 class LIBPROTOBUF_EXPORT CodedInputStream {
    160  public:
    161   // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
    162   explicit CodedInputStream(ZeroCopyInputStream* input);
    163 
    164   // Create a CodedInputStream that reads from the given flat array.  This is
    165   // faster than using an ArrayInputStream.  PushLimit(size) is implied by
    166   // this constructor.
    167   explicit CodedInputStream(const uint8* buffer, int size);
    168 
    169   // Destroy the CodedInputStream and position the underlying
    170   // ZeroCopyInputStream at the first unread byte.  If an error occurred while
    171   // reading (causing a method to return false), then the exact position of
    172   // the input stream may be anywhere between the last value that was read
    173   // successfully and the stream's byte limit.
    174   ~CodedInputStream();
    175 
    176   // Return true if this CodedInputStream reads from a flat array instead of
    177   // a ZeroCopyInputStream.
    178   inline bool IsFlat() const;
    179 
    180   // Skips a number of bytes.  Returns false if an underlying read error
    181   // occurs.
    182   bool Skip(int count);
    183 
    184   // Sets *data to point directly at the unread part of the CodedInputStream's
    185   // underlying buffer, and *size to the size of that buffer, but does not
    186   // advance the stream's current position.  This will always either produce
    187   // a non-empty buffer or return false.  If the caller consumes any of
    188   // this data, it should then call Skip() to skip over the consumed bytes.
    189   // This may be useful for implementing external fast parsing routines for
    190   // types of data not covered by the CodedInputStream interface.
    191   bool GetDirectBufferPointer(const void** data, int* size);
    192 
    193   // Like GetDirectBufferPointer, but this method is inlined, and does not
    194   // attempt to Refresh() if the buffer is currently empty.
    195   GOOGLE_ATTRIBUTE_ALWAYS_INLINE void GetDirectBufferPointerInline(const void** data,
    196                                                             int* size);
    197 
    198   // Read raw bytes, copying them into the given buffer.
    199   bool ReadRaw(void* buffer, int size);
    200 
    201   // Like the above, with inlined optimizations. This should only be used
    202   // by the protobuf implementation.
    203   GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadRawInline(void* buffer, int size);
    204 
    205   // Like ReadRaw, but reads into a string.
    206   //
    207   // Implementation Note:  ReadString() grows the string gradually as it
    208   // reads in the data, rather than allocating the entire requested size
    209   // upfront.  This prevents denial-of-service attacks in which a client
    210   // could claim that a string is going to be MAX_INT bytes long in order to
    211   // crash the server because it can't allocate this much space at once.
    212   bool ReadString(string* buffer, int size);
    213   // Like the above, with inlined optimizations. This should only be used
    214   // by the protobuf implementation.
    215   GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadStringInline(string* buffer,
    216                                                         int size);
    217 
    218 
    219   // Read a 32-bit little-endian integer.
    220   bool ReadLittleEndian32(uint32* value);
    221   // Read a 64-bit little-endian integer.
    222   bool ReadLittleEndian64(uint64* value);
    223 
    224   // These methods read from an externally provided buffer. The caller is
    225   // responsible for ensuring that the buffer has sufficient space.
    226   // Read a 32-bit little-endian integer.
    227   static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
    228                                                    uint32* value);
    229   // Read a 64-bit little-endian integer.
    230   static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
    231                                                    uint64* value);
    232 
    233   // Read an unsigned integer with Varint encoding, truncating to 32 bits.
    234   // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
    235   // it to uint32, but may be more efficient.
    236   bool ReadVarint32(uint32* value);
    237   // Read an unsigned integer with Varint encoding.
    238   bool ReadVarint64(uint64* value);
    239 
    240   // Read a tag.  This calls ReadVarint32() and returns the result, or returns
    241   // zero (which is not a valid tag) if ReadVarint32() fails.  Also, it updates
    242   // the last tag value, which can be checked with LastTagWas().
    243   // Always inline because this is only called in one place per parse loop
    244   // but it is called for every iteration of said loop, so it should be fast.
    245   // GCC doesn't want to inline this by default.
    246   GOOGLE_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag();
    247 
    248   // This usually a faster alternative to ReadTag() when cutoff is a manifest
    249   // constant.  It does particularly well for cutoff >= 127.  The first part
    250   // of the return value is the tag that was read, though it can also be 0 in
    251   // the cases where ReadTag() would return 0.  If the second part is true
    252   // then the tag is known to be in [0, cutoff].  If not, the tag either is
    253   // above cutoff or is 0.  (There's intentional wiggle room when tag is 0,
    254   // because that can arise in several ways, and for best performance we want
    255   // to avoid an extra "is tag == 0?" check here.)
    256   GOOGLE_ATTRIBUTE_ALWAYS_INLINE std::pair<uint32, bool> ReadTagWithCutoff(
    257       uint32 cutoff);
    258 
    259   // Usually returns true if calling ReadVarint32() now would produce the given
    260   // value.  Will always return false if ReadVarint32() would not return the
    261   // given value.  If ExpectTag() returns true, it also advances past
    262   // the varint.  For best performance, use a compile-time constant as the
    263   // parameter.
    264   // Always inline because this collapses to a small number of instructions
    265   // when given a constant parameter, but GCC doesn't want to inline by default.
    266   GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
    267 
    268   // Like above, except this reads from the specified buffer. The caller is
    269   // responsible for ensuring that the buffer is large enough to read a varint
    270   // of the expected size. For best performance, use a compile-time constant as
    271   // the expected tag parameter.
    272   //
    273   // Returns a pointer beyond the expected tag if it was found, or NULL if it
    274   // was not.
    275   GOOGLE_ATTRIBUTE_ALWAYS_INLINE static const uint8* ExpectTagFromArray(
    276       const uint8* buffer,
    277       uint32 expected);
    278 
    279   // Usually returns true if no more bytes can be read.  Always returns false
    280   // if more bytes can be read.  If ExpectAtEnd() returns true, a subsequent
    281   // call to LastTagWas() will act as if ReadTag() had been called and returned
    282   // zero, and ConsumedEntireMessage() will return true.
    283   bool ExpectAtEnd();
    284 
    285   // If the last call to ReadTag() or ReadTagWithCutoff() returned the
    286   // given value, returns true.  Otherwise, returns false;
    287   //
    288   // This is needed because parsers for some types of embedded messages
    289   // (with field type TYPE_GROUP) don't actually know that they've reached the
    290   // end of a message until they see an ENDGROUP tag, which was actually part
    291   // of the enclosing message.  The enclosing message would like to check that
    292   // tag to make sure it had the right number, so it calls LastTagWas() on
    293   // return from the embedded parser to check.
    294   bool LastTagWas(uint32 expected);
    295 
    296   // When parsing message (but NOT a group), this method must be called
    297   // immediately after MergeFromCodedStream() returns (if it returns true)
    298   // to further verify that the message ended in a legitimate way.  For
    299   // example, this verifies that parsing did not end on an end-group tag.
    300   // It also checks for some cases where, due to optimizations,
    301   // MergeFromCodedStream() can incorrectly return true.
    302   bool ConsumedEntireMessage();
    303 
    304   // Limits ----------------------------------------------------------
    305   // Limits are used when parsing length-delimited embedded messages.
    306   // After the message's length is read, PushLimit() is used to prevent
    307   // the CodedInputStream from reading beyond that length.  Once the
    308   // embedded message has been parsed, PopLimit() is called to undo the
    309   // limit.
    310 
    311   // Opaque type used with PushLimit() and PopLimit().  Do not modify
    312   // values of this type yourself.  The only reason that this isn't a
    313   // struct with private internals is for efficiency.
    314   typedef int Limit;
    315 
    316   // Places a limit on the number of bytes that the stream may read,
    317   // starting from the current position.  Once the stream hits this limit,
    318   // it will act like the end of the input has been reached until PopLimit()
    319   // is called.
    320   //
    321   // As the names imply, the stream conceptually has a stack of limits.  The
    322   // shortest limit on the stack is always enforced, even if it is not the
    323   // top limit.
    324   //
    325   // The value returned by PushLimit() is opaque to the caller, and must
    326   // be passed unchanged to the corresponding call to PopLimit().
    327   Limit PushLimit(int byte_limit);
    328 
    329   // Pops the last limit pushed by PushLimit().  The input must be the value
    330   // returned by that call to PushLimit().
    331   void PopLimit(Limit limit);
    332 
    333   // Returns the number of bytes left until the nearest limit on the
    334   // stack is hit, or -1 if no limits are in place.
    335   int BytesUntilLimit() const;
    336 
    337   // Returns current position relative to the beginning of the input stream.
    338   int CurrentPosition() const;
    339 
    340   // Total Bytes Limit -----------------------------------------------
    341   // To prevent malicious users from sending excessively large messages
    342   // and causing integer overflows or memory exhaustion, CodedInputStream
    343   // imposes a hard limit on the total number of bytes it will read.
    344 
    345   // Sets the maximum number of bytes that this CodedInputStream will read
    346   // before refusing to continue.  To prevent integer overflows in the
    347   // protocol buffers implementation, as well as to prevent servers from
    348   // allocating enormous amounts of memory to hold parsed messages, the
    349   // maximum message length should be limited to the shortest length that
    350   // will not harm usability.  The theoretical shortest message that could
    351   // cause integer overflows is 512MB.  The default limit is 64MB.  Apps
    352   // should set shorter limits if possible.  If warning_threshold is not -1,
    353   // a warning will be printed to stderr after warning_threshold bytes are
    354   // read.  For backwards compatibility all negative values get squashed to -1,
    355   // as other negative values might have special internal meanings.
    356   // An error will always be printed to stderr if the limit is reached.
    357   //
    358   // This is unrelated to PushLimit()/PopLimit().
    359   //
    360   // Hint:  If you are reading this because your program is printing a
    361   //   warning about dangerously large protocol messages, you may be
    362   //   confused about what to do next.  The best option is to change your
    363   //   design such that excessively large messages are not necessary.
    364   //   For example, try to design file formats to consist of many small
    365   //   messages rather than a single large one.  If this is infeasible,
    366   //   you will need to increase the limit.  Chances are, though, that
    367   //   your code never constructs a CodedInputStream on which the limit
    368   //   can be set.  You probably parse messages by calling things like
    369   //   Message::ParseFromString().  In this case, you will need to change
    370   //   your code to instead construct some sort of ZeroCopyInputStream
    371   //   (e.g. an ArrayInputStream), construct a CodedInputStream around
    372   //   that, then call Message::ParseFromCodedStream() instead.  Then
    373   //   you can adjust the limit.  Yes, it's more work, but you're doing
    374   //   something unusual.
    375   void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
    376 
    377   // The Total Bytes Limit minus the Current Position, or -1 if there
    378   // is no Total Bytes Limit.
    379   int BytesUntilTotalBytesLimit() const;
    380 
    381   // Recursion Limit -------------------------------------------------
    382   // To prevent corrupt or malicious messages from causing stack overflows,
    383   // we must keep track of the depth of recursion when parsing embedded
    384   // messages and groups.  CodedInputStream keeps track of this because it
    385   // is the only object that is passed down the stack during parsing.
    386 
    387   // Sets the maximum recursion depth.  The default is 100.
    388   void SetRecursionLimit(int limit);
    389 
    390 
    391   // Increments the current recursion depth.  Returns true if the depth is
    392   // under the limit, false if it has gone over.
    393   bool IncrementRecursionDepth();
    394 
    395   // Decrements the recursion depth if possible.
    396   void DecrementRecursionDepth();
    397 
    398   // Decrements the recursion depth blindly.  This is faster than
    399   // DecrementRecursionDepth().  It should be used only if all previous
    400   // increments to recursion depth were successful.
    401   void UnsafeDecrementRecursionDepth();
    402 
    403   // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
    404   // Using this can reduce code size and complexity in some cases.  The caller
    405   // is expected to check that the second part of the result is non-negative (to
    406   // bail out if the depth of recursion is too high) and, if all is well, to
    407   // later pass the first part of the result to PopLimit() or similar.
    408   std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
    409       int byte_limit);
    410 
    411   // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
    412   Limit ReadLengthAndPushLimit();
    413 
    414   // Helper that is equivalent to: {
    415   //  bool result = ConsumedEntireMessage();
    416   //  PopLimit(limit);
    417   //  UnsafeDecrementRecursionDepth();
    418   //  return result; }
    419   // Using this can reduce code size and complexity in some cases.
    420   // Do not use unless the current recursion depth is greater than zero.
    421   bool DecrementRecursionDepthAndPopLimit(Limit limit);
    422 
    423   // Helper that is equivalent to: {
    424   //  bool result = ConsumedEntireMessage();
    425   //  PopLimit(limit);
    426   //  return result; }
    427   // Using this can reduce code size and complexity in some cases.
    428   bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
    429 
    430   // Extension Registry ----------------------------------------------
    431   // ADVANCED USAGE:  99.9% of people can ignore this section.
    432   //
    433   // By default, when parsing extensions, the parser looks for extension
    434   // definitions in the pool which owns the outer message's Descriptor.
    435   // However, you may call SetExtensionRegistry() to provide an alternative
    436   // pool instead.  This makes it possible, for example, to parse a message
    437   // using a generated class, but represent some extensions using
    438   // DynamicMessage.
    439 
    440   // Set the pool used to look up extensions.  Most users do not need to call
    441   // this as the correct pool will be chosen automatically.
    442   //
    443   // WARNING:  It is very easy to misuse this.  Carefully read the requirements
    444   //   below.  Do not use this unless you are sure you need it.  Almost no one
    445   //   does.
    446   //
    447   // Let's say you are parsing a message into message object m, and you want
    448   // to take advantage of SetExtensionRegistry().  You must follow these
    449   // requirements:
    450   //
    451   // The given DescriptorPool must contain m->GetDescriptor().  It is not
    452   // sufficient for it to simply contain a descriptor that has the same name
    453   // and content -- it must be the *exact object*.  In other words:
    454   //   assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
    455   //          m->GetDescriptor());
    456   // There are two ways to satisfy this requirement:
    457   // 1) Use m->GetDescriptor()->pool() as the pool.  This is generally useless
    458   //    because this is the pool that would be used anyway if you didn't call
    459   //    SetExtensionRegistry() at all.
    460   // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
    461   //    "underlay".  Read the documentation for DescriptorPool for more
    462   //    information about underlays.
    463   //
    464   // You must also provide a MessageFactory.  This factory will be used to
    465   // construct Message objects representing extensions.  The factory's
    466   // GetPrototype() MUST return non-NULL for any Descriptor which can be found
    467   // through the provided pool.
    468   //
    469   // If the provided factory might return instances of protocol-compiler-
    470   // generated (i.e. compiled-in) types, or if the outer message object m is
    471   // a generated type, then the given factory MUST have this property:  If
    472   // GetPrototype() is given a Descriptor which resides in
    473   // DescriptorPool::generated_pool(), the factory MUST return the same
    474   // prototype which MessageFactory::generated_factory() would return.  That
    475   // is, given a descriptor for a generated type, the factory must return an
    476   // instance of the generated class (NOT DynamicMessage).  However, when
    477   // given a descriptor for a type that is NOT in generated_pool, the factory
    478   // is free to return any implementation.
    479   //
    480   // The reason for this requirement is that generated sub-objects may be
    481   // accessed via the standard (non-reflection) extension accessor methods,
    482   // and these methods will down-cast the object to the generated class type.
    483   // If the object is not actually of that type, the results would be undefined.
    484   // On the other hand, if an extension is not compiled in, then there is no
    485   // way the code could end up accessing it via the standard accessors -- the
    486   // only way to access the extension is via reflection.  When using reflection,
    487   // DynamicMessage and generated messages are indistinguishable, so it's fine
    488   // if these objects are represented using DynamicMessage.
    489   //
    490   // Using DynamicMessageFactory on which you have called
    491   // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
    492   // above requirement.
    493   //
    494   // If either pool or factory is NULL, both must be NULL.
    495   //
    496   // Note that this feature is ignored when parsing "lite" messages as they do
    497   // not have descriptors.
    498   void SetExtensionRegistry(const DescriptorPool* pool,
    499                             MessageFactory* factory);
    500 
    501   // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
    502   // has been provided.
    503   const DescriptorPool* GetExtensionPool();
    504 
    505   // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
    506   // factory has been provided.
    507   MessageFactory* GetExtensionFactory();
    508 
    509  private:
    510   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
    511 
    512   const uint8* buffer_;
    513   const uint8* buffer_end_;     // pointer to the end of the buffer.
    514   ZeroCopyInputStream* input_;
    515   int total_bytes_read_;  // total bytes read from input_, including
    516                           // the current buffer
    517 
    518   // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
    519   // so that we can BackUp() on destruction.
    520   int overflow_bytes_;
    521 
    522   // LastTagWas() stuff.
    523   uint32 last_tag_;         // result of last ReadTag() or ReadTagWithCutoff().
    524 
    525   // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
    526   // at EOF, or by ExpectAtEnd() when it returns true.  This happens when we
    527   // reach the end of a message and attempt to read another tag.
    528   bool legitimate_message_end_;
    529 
    530   // See EnableAliasing().
    531   bool aliasing_enabled_;
    532 
    533   // Limits
    534   Limit current_limit_;   // if position = -1, no limit is applied
    535 
    536   // For simplicity, if the current buffer crosses a limit (either a normal
    537   // limit created by PushLimit() or the total bytes limit), buffer_size_
    538   // only tracks the number of bytes before that limit.  This field
    539   // contains the number of bytes after it.  Note that this implies that if
    540   // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
    541   // hit a limit.  However, if both are zero, it doesn't necessarily mean
    542   // we aren't at a limit -- the buffer may have ended exactly at the limit.
    543   int buffer_size_after_limit_;
    544 
    545   // Maximum number of bytes to read, period.  This is unrelated to
    546   // current_limit_.  Set using SetTotalBytesLimit().
    547   int total_bytes_limit_;
    548 
    549   // If positive/0: Limit for bytes read after which a warning due to size
    550   // should be logged.
    551   // If -1: Printing of warning disabled. Can be set by client.
    552   // If -2: Internal: Limit has been reached, print full size when destructing.
    553   int total_bytes_warning_threshold_;
    554 
    555   // Current recursion budget, controlled by IncrementRecursionDepth() and
    556   // similar.  Starts at recursion_limit_ and goes down: if this reaches
    557   // -1 we are over budget.
    558   int recursion_budget_;
    559   // Recursion depth limit, set by SetRecursionLimit().
    560   int recursion_limit_;
    561 
    562   // See SetExtensionRegistry().
    563   const DescriptorPool* extension_pool_;
    564   MessageFactory* extension_factory_;
    565 
    566   // Private member functions.
    567 
    568   // Advance the buffer by a given number of bytes.
    569   void Advance(int amount);
    570 
    571   // Back up input_ to the current buffer position.
    572   void BackUpInputToCurrentPosition();
    573 
    574   // Recomputes the value of buffer_size_after_limit_.  Must be called after
    575   // current_limit_ or total_bytes_limit_ changes.
    576   void RecomputeBufferLimits();
    577 
    578   // Writes an error message saying that we hit total_bytes_limit_.
    579   void PrintTotalBytesLimitError();
    580 
    581   // Called when the buffer runs out to request more data.  Implies an
    582   // Advance(BufferSize()).
    583   bool Refresh();
    584 
    585   // When parsing varints, we optimize for the common case of small values, and
    586   // then optimize for the case when the varint fits within the current buffer
    587   // piece. The Fallback method is used when we can't use the one-byte
    588   // optimization. The Slow method is yet another fallback when the buffer is
    589   // not large enough. Making the slow path out-of-line speeds up the common
    590   // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
    591   // message crosses multiple buffers.  Note: ReadVarint32Fallback() and
    592   // ReadVarint64Fallback() are called frequently and generally not inlined, so
    593   // they have been optimized to avoid "out" parameters.  The former returns -1
    594   // if it fails and the uint32 it read otherwise.  The latter has a bool
    595   // indicating success or failure as part of its return type.
    596   int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
    597   std::pair<uint64, bool> ReadVarint64Fallback();
    598   bool ReadVarint32Slow(uint32* value);
    599   bool ReadVarint64Slow(uint64* value);
    600   bool ReadLittleEndian32Fallback(uint32* value);
    601   bool ReadLittleEndian64Fallback(uint64* value);
    602   // Fallback/slow methods for reading tags. These do not update last_tag_,
    603   // but will set legitimate_message_end_ if we are at the end of the input
    604   // stream.
    605   uint32 ReadTagFallback(uint32 first_byte_or_zero);
    606   uint32 ReadTagSlow();
    607   bool ReadStringFallback(string* buffer, int size);
    608 
    609   // Return the size of the buffer.
    610   int BufferSize() const;
    611 
    612   static const int kDefaultTotalBytesLimit = 64 << 20;  // 64MB
    613 
    614   static const int kDefaultTotalBytesWarningThreshold = 32 << 20;  // 32MB
    615 
    616   static int default_recursion_limit_;  // 100 by default.
    617 };
    618 
    619 // Class which encodes and writes binary data which is composed of varint-
    620 // encoded integers and fixed-width pieces.  Wraps a ZeroCopyOutputStream.
    621 // Most users will not need to deal with CodedOutputStream.
    622 //
    623 // Most methods of CodedOutputStream which return a bool return false if an
    624 // underlying I/O error occurs.  Once such a failure occurs, the
    625 // CodedOutputStream is broken and is no longer useful. The Write* methods do
    626 // not return the stream status, but will invalidate the stream if an error
    627 // occurs. The client can probe HadError() to determine the status.
    628 //
    629 // Note that every method of CodedOutputStream which writes some data has
    630 // a corresponding static "ToArray" version. These versions write directly
    631 // to the provided buffer, returning a pointer past the last written byte.
    632 // They require that the buffer has sufficient capacity for the encoded data.
    633 // This allows an optimization where we check if an output stream has enough
    634 // space for an entire message before we start writing and, if there is, we
    635 // call only the ToArray methods to avoid doing bound checks for each
    636 // individual value.
    637 // i.e., in the example above:
    638 //
    639 //   CodedOutputStream coded_output = new CodedOutputStream(raw_output);
    640 //   int magic_number = 1234;
    641 //   char text[] = "Hello world!";
    642 //
    643 //   int coded_size = sizeof(magic_number) +
    644 //                    CodedOutputStream::VarintSize32(strlen(text)) +
    645 //                    strlen(text);
    646 //
    647 //   uint8* buffer =
    648 //       coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
    649 //   if (buffer != NULL) {
    650 //     // The output stream has enough space in the buffer: write directly to
    651 //     // the array.
    652 //     buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
    653 //                                                            buffer);
    654 //     buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
    655 //     buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
    656 //   } else {
    657 //     // Make bound-checked writes, which will ask the underlying stream for
    658 //     // more space as needed.
    659 //     coded_output->WriteLittleEndian32(magic_number);
    660 //     coded_output->WriteVarint32(strlen(text));
    661 //     coded_output->WriteRaw(text, strlen(text));
    662 //   }
    663 //
    664 //   delete coded_output;
    665 class LIBPROTOBUF_EXPORT CodedOutputStream {
    666  public:
    667   // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
    668   explicit CodedOutputStream(ZeroCopyOutputStream* output);
    669   CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
    670 
    671   // Destroy the CodedOutputStream and position the underlying
    672   // ZeroCopyOutputStream immediately after the last byte written.
    673   ~CodedOutputStream();
    674 
    675   // Trims any unused space in the underlying buffer so that its size matches
    676   // the number of bytes written by this stream. The underlying buffer will
    677   // automatically be trimmed when this stream is destroyed; this call is only
    678   // necessary if the underlying buffer is accessed *before* the stream is
    679   // destroyed.
    680   void Trim();
    681 
    682   // Skips a number of bytes, leaving the bytes unmodified in the underlying
    683   // buffer.  Returns false if an underlying write error occurs.  This is
    684   // mainly useful with GetDirectBufferPointer().
    685   bool Skip(int count);
    686 
    687   // Sets *data to point directly at the unwritten part of the
    688   // CodedOutputStream's underlying buffer, and *size to the size of that
    689   // buffer, but does not advance the stream's current position.  This will
    690   // always either produce a non-empty buffer or return false.  If the caller
    691   // writes any data to this buffer, it should then call Skip() to skip over
    692   // the consumed bytes.  This may be useful for implementing external fast
    693   // serialization routines for types of data not covered by the
    694   // CodedOutputStream interface.
    695   bool GetDirectBufferPointer(void** data, int* size);
    696 
    697   // If there are at least "size" bytes available in the current buffer,
    698   // returns a pointer directly into the buffer and advances over these bytes.
    699   // The caller may then write directly into this buffer (e.g. using the
    700   // *ToArray static methods) rather than go through CodedOutputStream.  If
    701   // there are not enough bytes available, returns NULL.  The return pointer is
    702   // invalidated as soon as any other non-const method of CodedOutputStream
    703   // is called.
    704   inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
    705 
    706   // Write raw bytes, copying them from the given buffer.
    707   void WriteRaw(const void* buffer, int size);
    708   // Like WriteRaw()  but will try to write aliased data if aliasing is
    709   // turned on.
    710   void WriteRawMaybeAliased(const void* data, int size);
    711   // Like WriteRaw()  but writing directly to the target array.
    712   // This is _not_ inlined, as the compiler often optimizes memcpy into inline
    713   // copy loops. Since this gets called by every field with string or bytes
    714   // type, inlining may lead to a significant amount of code bloat, with only a
    715   // minor performance gain.
    716   static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
    717 
    718   // Equivalent to WriteRaw(str.data(), str.size()).
    719   void WriteString(const string& str);
    720   // Like WriteString()  but writing directly to the target array.
    721   static uint8* WriteStringToArray(const string& str, uint8* target);
    722   // Write the varint-encoded size of str followed by str.
    723   static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
    724 
    725 
    726   // Instructs the CodedOutputStream to allow the underlying
    727   // ZeroCopyOutputStream to hold pointers to the original structure instead of
    728   // copying, if it supports it (i.e. output->AllowsAliasing() is true).  If the
    729   // underlying stream does not support aliasing, then enabling it has no
    730   // affect.  For now, this only affects the behavior of
    731   // WriteRawMaybeAliased().
    732   //
    733   // NOTE: It is caller's responsibility to ensure that the chunk of memory
    734   // remains live until all of the data has been consumed from the stream.
    735   void EnableAliasing(bool enabled);
    736 
    737   // Write a 32-bit little-endian integer.
    738   void WriteLittleEndian32(uint32 value);
    739   // Like WriteLittleEndian32()  but writing directly to the target array.
    740   static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
    741   // Write a 64-bit little-endian integer.
    742   void WriteLittleEndian64(uint64 value);
    743   // Like WriteLittleEndian64()  but writing directly to the target array.
    744   static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
    745 
    746   // Write an unsigned integer with Varint encoding.  Writing a 32-bit value
    747   // is equivalent to casting it to uint64 and writing it as a 64-bit value,
    748   // but may be more efficient.
    749   void WriteVarint32(uint32 value);
    750   // Like WriteVarint32()  but writing directly to the target array.
    751   static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
    752   // Write an unsigned integer with Varint encoding.
    753   void WriteVarint64(uint64 value);
    754   // Like WriteVarint64()  but writing directly to the target array.
    755   static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
    756 
    757   // Equivalent to WriteVarint32() except when the value is negative,
    758   // in which case it must be sign-extended to a full 10 bytes.
    759   void WriteVarint32SignExtended(int32 value);
    760   // Like WriteVarint32SignExtended()  but writing directly to the target array.
    761   static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
    762 
    763   // This is identical to WriteVarint32(), but optimized for writing tags.
    764   // In particular, if the input is a compile-time constant, this method
    765   // compiles down to a couple instructions.
    766   // Always inline because otherwise the aformentioned optimization can't work,
    767   // but GCC by default doesn't want to inline this.
    768   void WriteTag(uint32 value);
    769   // Like WriteTag()  but writing directly to the target array.
    770   GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteTagToArray(uint32 value,
    771                                                         uint8* target);
    772 
    773   // Returns the number of bytes needed to encode the given value as a varint.
    774   static int VarintSize32(uint32 value);
    775   // Returns the number of bytes needed to encode the given value as a varint.
    776   static int VarintSize64(uint64 value);
    777 
    778   // If negative, 10 bytes.  Otheriwse, same as VarintSize32().
    779   static int VarintSize32SignExtended(int32 value);
    780 
    781   // Compile-time equivalent of VarintSize32().
    782   template <uint32 Value>
    783   struct StaticVarintSize32 {
    784     static const int value =
    785         (Value < (1 << 7))
    786             ? 1
    787             : (Value < (1 << 14))
    788                 ? 2
    789                 : (Value < (1 << 21))
    790                     ? 3
    791                     : (Value < (1 << 28))
    792                         ? 4
    793                         : 5;
    794   };
    795 
    796   // Returns the total number of bytes written since this object was created.
    797   inline int ByteCount() const;
    798 
    799   // Returns true if there was an underlying I/O error since this object was
    800   // created.
    801   bool HadError() const { return had_error_; }
    802 
    803  private:
    804   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
    805 
    806   ZeroCopyOutputStream* output_;
    807   uint8* buffer_;
    808   int buffer_size_;
    809   int total_bytes_;  // Sum of sizes of all buffers seen so far.
    810   bool had_error_;   // Whether an error occurred during output.
    811   bool aliasing_enabled_;  // See EnableAliasing().
    812 
    813   // Advance the buffer by a given number of bytes.
    814   void Advance(int amount);
    815 
    816   // Called when the buffer runs out to request more data.  Implies an
    817   // Advance(buffer_size_).
    818   bool Refresh();
    819 
    820   // Like WriteRaw() but may avoid copying if the underlying
    821   // ZeroCopyOutputStream supports it.
    822   void WriteAliasedRaw(const void* buffer, int size);
    823 
    824   // If this write might cross the end of the buffer, we compose the bytes first
    825   // then use WriteRaw().
    826   void WriteVarint32SlowPath(uint32 value);
    827 
    828   // Always-inlined versions of WriteVarint* functions so that code can be
    829   // reused, while still controlling size. For instance, WriteVarint32ToArray()
    830   // should not directly call this: since it is inlined itself, doing so
    831   // would greatly increase the size of generated code. Instead, it should call
    832   // WriteVarint32FallbackToArray.  Meanwhile, WriteVarint32() is already
    833   // out-of-line, so it should just invoke this directly to avoid any extra
    834   // function call overhead.
    835   GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteVarint64ToArrayInline(
    836       uint64 value, uint8* target);
    837 
    838   static int VarintSize32Fallback(uint32 value);
    839 };
    840 
    841 // inline methods ====================================================
    842 // The vast majority of varints are only one byte.  These inline
    843 // methods optimize for that case.
    844 
    845 inline bool CodedInputStream::ReadVarint32(uint32* value) {
    846   uint32 v = 0;
    847   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
    848     v = *buffer_;
    849     if (v < 0x80) {
    850       *value = v;
    851       Advance(1);
    852       return true;
    853     }
    854   }
    855   int64 result = ReadVarint32Fallback(v);
    856   *value = static_cast<uint32>(result);
    857   return result >= 0;
    858 }
    859 
    860 inline bool CodedInputStream::ReadVarint64(uint64* value) {
    861   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
    862     *value = *buffer_;
    863     Advance(1);
    864     return true;
    865   }
    866   std::pair<uint64, bool> p = ReadVarint64Fallback();
    867   *value = p.first;
    868   return p.second;
    869 }
    870 
    871 // static
    872 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
    873     const uint8* buffer,
    874     uint32* value) {
    875 #if defined(PROTOBUF_LITTLE_ENDIAN)
    876   memcpy(value, buffer, sizeof(*value));
    877   return buffer + sizeof(*value);
    878 #else
    879   *value = (static_cast<uint32>(buffer[0])      ) |
    880            (static_cast<uint32>(buffer[1]) <<  8) |
    881            (static_cast<uint32>(buffer[2]) << 16) |
    882            (static_cast<uint32>(buffer[3]) << 24);
    883   return buffer + sizeof(*value);
    884 #endif
    885 }
    886 // static
    887 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
    888     const uint8* buffer,
    889     uint64* value) {
    890 #if defined(PROTOBUF_LITTLE_ENDIAN)
    891   memcpy(value, buffer, sizeof(*value));
    892   return buffer + sizeof(*value);
    893 #else
    894   uint32 part0 = (static_cast<uint32>(buffer[0])      ) |
    895                  (static_cast<uint32>(buffer[1]) <<  8) |
    896                  (static_cast<uint32>(buffer[2]) << 16) |
    897                  (static_cast<uint32>(buffer[3]) << 24);
    898   uint32 part1 = (static_cast<uint32>(buffer[4])      ) |
    899                  (static_cast<uint32>(buffer[5]) <<  8) |
    900                  (static_cast<uint32>(buffer[6]) << 16) |
    901                  (static_cast<uint32>(buffer[7]) << 24);
    902   *value = static_cast<uint64>(part0) |
    903           (static_cast<uint64>(part1) << 32);
    904   return buffer + sizeof(*value);
    905 #endif
    906 }
    907 
    908 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
    909 #if defined(PROTOBUF_LITTLE_ENDIAN)
    910   if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
    911     memcpy(value, buffer_, sizeof(*value));
    912     Advance(sizeof(*value));
    913     return true;
    914   } else {
    915     return ReadLittleEndian32Fallback(value);
    916   }
    917 #else
    918   return ReadLittleEndian32Fallback(value);
    919 #endif
    920 }
    921 
    922 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
    923 #if defined(PROTOBUF_LITTLE_ENDIAN)
    924   if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
    925     memcpy(value, buffer_, sizeof(*value));
    926     Advance(sizeof(*value));
    927     return true;
    928   } else {
    929     return ReadLittleEndian64Fallback(value);
    930   }
    931 #else
    932   return ReadLittleEndian64Fallback(value);
    933 #endif
    934 }
    935 
    936 inline uint32 CodedInputStream::ReadTag() {
    937   uint32 v = 0;
    938   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
    939     v = *buffer_;
    940     if (v < 0x80) {
    941       last_tag_ = v;
    942       Advance(1);
    943       return v;
    944     }
    945   }
    946   last_tag_ = ReadTagFallback(v);
    947   return last_tag_;
    948 }
    949 
    950 inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
    951     uint32 cutoff) {
    952   // In performance-sensitive code we can expect cutoff to be a compile-time
    953   // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
    954   // compile time.
    955   uint32 first_byte_or_zero = 0;
    956   if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
    957     // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
    958     // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
    959     // is large enough then is it better to check for the two-byte case first?
    960     first_byte_or_zero = buffer_[0];
    961     if (static_cast<int8>(buffer_[0]) > 0) {
    962       const uint32 kMax1ByteVarint = 0x7f;
    963       uint32 tag = last_tag_ = buffer_[0];
    964       Advance(1);
    965       return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
    966     }
    967     // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
    968     // and tag is two bytes.  The latter is tested by bitwise-and-not of the
    969     // first byte and the second byte.
    970     if (cutoff >= 0x80 &&
    971         GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
    972         GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
    973       const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
    974       uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
    975       Advance(2);
    976       // It might make sense to test for tag == 0 now, but it is so rare that
    977       // that we don't bother.  A varint-encoded 0 should be one byte unless
    978       // the encoder lost its mind.  The second part of the return value of
    979       // this function is allowed to be either true or false if the tag is 0,
    980       // so we don't have to check for tag == 0.  We may need to check whether
    981       // it exceeds cutoff.
    982       bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
    983       return std::make_pair(tag, at_or_below_cutoff);
    984     }
    985   }
    986   // Slow path
    987   last_tag_ = ReadTagFallback(first_byte_or_zero);
    988   // If last_tag_ == 0 we want to return { 0, false } so the following overflow is intended.
    989   // We use __builtin_add_overflow to appease the sub-overflow UB sanitizer.
    990   uint32_t last_tag_minus_one;
    991   __builtin_add_overflow(last_tag_, -1, &last_tag_minus_one);
    992   return std::make_pair(last_tag_, last_tag_minus_one < cutoff);
    993 }
    994 
    995 inline bool CodedInputStream::LastTagWas(uint32 expected) {
    996   return last_tag_ == expected;
    997 }
    998 
    999 inline bool CodedInputStream::ConsumedEntireMessage() {
   1000   return legitimate_message_end_;
   1001 }
   1002 
   1003 inline bool CodedInputStream::ExpectTag(uint32 expected) {
   1004   if (expected < (1 << 7)) {
   1005     if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
   1006       Advance(1);
   1007       return true;
   1008     } else {
   1009       return false;
   1010     }
   1011   } else if (expected < (1 << 14)) {
   1012     if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
   1013         buffer_[0] == static_cast<uint8>(expected | 0x80) &&
   1014         buffer_[1] == static_cast<uint8>(expected >> 7)) {
   1015       Advance(2);
   1016       return true;
   1017     } else {
   1018       return false;
   1019     }
   1020   } else {
   1021     // Don't bother optimizing for larger values.
   1022     return false;
   1023   }
   1024 }
   1025 
   1026 inline const uint8* CodedInputStream::ExpectTagFromArray(
   1027     const uint8* buffer, uint32 expected) {
   1028   if (expected < (1 << 7)) {
   1029     if (buffer[0] == expected) {
   1030       return buffer + 1;
   1031     }
   1032   } else if (expected < (1 << 14)) {
   1033     if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
   1034         buffer[1] == static_cast<uint8>(expected >> 7)) {
   1035       return buffer + 2;
   1036     }
   1037   }
   1038   return NULL;
   1039 }
   1040 
   1041 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
   1042                                                            int* size) {
   1043   *data = buffer_;
   1044   *size = static_cast<int>(buffer_end_ - buffer_);
   1045 }
   1046 
   1047 inline bool CodedInputStream::ExpectAtEnd() {
   1048   // If we are at a limit we know no more bytes can be read.  Otherwise, it's
   1049   // hard to say without calling Refresh(), and we'd rather not do that.
   1050 
   1051   if (buffer_ == buffer_end_ &&
   1052       ((buffer_size_after_limit_ != 0) ||
   1053        (total_bytes_read_ == current_limit_))) {
   1054     last_tag_ = 0;                   // Pretend we called ReadTag()...
   1055     legitimate_message_end_ = true;  // ... and it hit EOF.
   1056     return true;
   1057   } else {
   1058     return false;
   1059   }
   1060 }
   1061 
   1062 inline int CodedInputStream::CurrentPosition() const {
   1063   return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
   1064 }
   1065 
   1066 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
   1067   if (buffer_size_ < size) {
   1068     return NULL;
   1069   } else {
   1070     uint8* result = buffer_;
   1071     Advance(size);
   1072     return result;
   1073   }
   1074 }
   1075 
   1076 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
   1077                                                       uint8* target) {
   1078   while (value >= 0x80) {
   1079     *target = static_cast<uint8>(value | 0x80);
   1080     value >>= 7;
   1081     ++target;
   1082   }
   1083   *target = static_cast<uint8>(value);
   1084   return target + 1;
   1085 }
   1086 
   1087 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
   1088   if (value < 0) {
   1089     WriteVarint64(static_cast<uint64>(value));
   1090   } else {
   1091     WriteVarint32(static_cast<uint32>(value));
   1092   }
   1093 }
   1094 
   1095 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
   1096     int32 value, uint8* target) {
   1097   if (value < 0) {
   1098     return WriteVarint64ToArray(static_cast<uint64>(value), target);
   1099   } else {
   1100     return WriteVarint32ToArray(static_cast<uint32>(value), target);
   1101   }
   1102 }
   1103 
   1104 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
   1105                                                             uint8* target) {
   1106 #if defined(PROTOBUF_LITTLE_ENDIAN)
   1107   memcpy(target, &value, sizeof(value));
   1108 #else
   1109   target[0] = static_cast<uint8>(value);
   1110   target[1] = static_cast<uint8>(value >>  8);
   1111   target[2] = static_cast<uint8>(value >> 16);
   1112   target[3] = static_cast<uint8>(value >> 24);
   1113 #endif
   1114   return target + sizeof(value);
   1115 }
   1116 
   1117 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
   1118                                                             uint8* target) {
   1119 #if defined(PROTOBUF_LITTLE_ENDIAN)
   1120   memcpy(target, &value, sizeof(value));
   1121 #else
   1122   uint32 part0 = static_cast<uint32>(value);
   1123   uint32 part1 = static_cast<uint32>(value >> 32);
   1124 
   1125   target[0] = static_cast<uint8>(part0);
   1126   target[1] = static_cast<uint8>(part0 >>  8);
   1127   target[2] = static_cast<uint8>(part0 >> 16);
   1128   target[3] = static_cast<uint8>(part0 >> 24);
   1129   target[4] = static_cast<uint8>(part1);
   1130   target[5] = static_cast<uint8>(part1 >>  8);
   1131   target[6] = static_cast<uint8>(part1 >> 16);
   1132   target[7] = static_cast<uint8>(part1 >> 24);
   1133 #endif
   1134   return target + sizeof(value);
   1135 }
   1136 
   1137 inline void CodedOutputStream::WriteVarint32(uint32 value) {
   1138   if (buffer_size_ >= 5) {
   1139     // Fast path:  We have enough bytes left in the buffer to guarantee that
   1140     // this write won't cross the end, so we can skip the checks.
   1141     uint8* target = buffer_;
   1142     uint8* end = WriteVarint32ToArray(value, target);
   1143     int size = static_cast<int>(end - target);
   1144     Advance(size);
   1145   } else {
   1146     WriteVarint32SlowPath(value);
   1147   }
   1148 }
   1149 
   1150 inline void CodedOutputStream::WriteTag(uint32 value) {
   1151   WriteVarint32(value);
   1152 }
   1153 
   1154 inline uint8* CodedOutputStream::WriteTagToArray(
   1155     uint32 value, uint8* target) {
   1156   return WriteVarint32ToArray(value, target);
   1157 }
   1158 
   1159 inline int CodedOutputStream::VarintSize32(uint32 value) {
   1160   if (value < (1 << 7)) {
   1161     return 1;
   1162   } else  {
   1163     return VarintSize32Fallback(value);
   1164   }
   1165 }
   1166 
   1167 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
   1168   if (value < 0) {
   1169     return 10;     // TODO(kenton):  Make this a symbolic constant.
   1170   } else {
   1171     return VarintSize32(static_cast<uint32>(value));
   1172   }
   1173 }
   1174 
   1175 inline void CodedOutputStream::WriteString(const string& str) {
   1176   WriteRaw(str.data(), static_cast<int>(str.size()));
   1177 }
   1178 
   1179 inline void CodedOutputStream::WriteRawMaybeAliased(
   1180     const void* data, int size) {
   1181   if (aliasing_enabled_) {
   1182     WriteAliasedRaw(data, size);
   1183   } else {
   1184     WriteRaw(data, size);
   1185   }
   1186 }
   1187 
   1188 inline uint8* CodedOutputStream::WriteStringToArray(
   1189     const string& str, uint8* target) {
   1190   return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
   1191 }
   1192 
   1193 inline int CodedOutputStream::ByteCount() const {
   1194   return total_bytes_ - buffer_size_;
   1195 }
   1196 
   1197 inline void CodedInputStream::Advance(int amount) {
   1198   buffer_ += amount;
   1199 }
   1200 
   1201 inline void CodedOutputStream::Advance(int amount) {
   1202   buffer_ += amount;
   1203   buffer_size_ -= amount;
   1204 }
   1205 
   1206 inline void CodedInputStream::SetRecursionLimit(int limit) {
   1207   recursion_budget_ += limit - recursion_limit_;
   1208   recursion_limit_ = limit;
   1209 }
   1210 
   1211 inline bool CodedInputStream::IncrementRecursionDepth() {
   1212   --recursion_budget_;
   1213   return recursion_budget_ >= 0;
   1214 }
   1215 
   1216 inline void CodedInputStream::DecrementRecursionDepth() {
   1217   if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
   1218 }
   1219 
   1220 inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
   1221   assert(recursion_budget_ < recursion_limit_);
   1222   ++recursion_budget_;
   1223 }
   1224 
   1225 inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
   1226                                                    MessageFactory* factory) {
   1227   extension_pool_ = pool;
   1228   extension_factory_ = factory;
   1229 }
   1230 
   1231 inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
   1232   return extension_pool_;
   1233 }
   1234 
   1235 inline MessageFactory* CodedInputStream::GetExtensionFactory() {
   1236   return extension_factory_;
   1237 }
   1238 
   1239 inline int CodedInputStream::BufferSize() const {
   1240   return static_cast<int>(buffer_end_ - buffer_);
   1241 }
   1242 
   1243 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
   1244   : buffer_(NULL),
   1245     buffer_end_(NULL),
   1246     input_(input),
   1247     total_bytes_read_(0),
   1248     overflow_bytes_(0),
   1249     last_tag_(0),
   1250     legitimate_message_end_(false),
   1251     aliasing_enabled_(false),
   1252     current_limit_(kint32max),
   1253     buffer_size_after_limit_(0),
   1254     total_bytes_limit_(kDefaultTotalBytesLimit),
   1255     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
   1256     recursion_budget_(default_recursion_limit_),
   1257     recursion_limit_(default_recursion_limit_),
   1258     extension_pool_(NULL),
   1259     extension_factory_(NULL) {
   1260   // Eagerly Refresh() so buffer space is immediately available.
   1261   Refresh();
   1262 }
   1263 
   1264 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
   1265   : buffer_(buffer),
   1266     buffer_end_(buffer + size),
   1267     input_(NULL),
   1268     total_bytes_read_(size),
   1269     overflow_bytes_(0),
   1270     last_tag_(0),
   1271     legitimate_message_end_(false),
   1272     aliasing_enabled_(false),
   1273     current_limit_(size),
   1274     buffer_size_after_limit_(0),
   1275     total_bytes_limit_(kDefaultTotalBytesLimit),
   1276     total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
   1277     recursion_budget_(default_recursion_limit_),
   1278     recursion_limit_(default_recursion_limit_),
   1279     extension_pool_(NULL),
   1280     extension_factory_(NULL) {
   1281   // Note that setting current_limit_ == size is important to prevent some
   1282   // code paths from trying to access input_ and segfaulting.
   1283 }
   1284 
   1285 inline bool CodedInputStream::IsFlat() const {
   1286   return input_ == NULL;
   1287 }
   1288 
   1289 }  // namespace io
   1290 }  // namespace protobuf
   1291 
   1292 
   1293 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
   1294   #pragma runtime_checks("c", restore)
   1295 #endif  // _MSC_VER && !defined(__INTEL_COMPILER)
   1296 
   1297 }  // namespace google
   1298 #endif  // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
   1299