Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 
     35 #include <stack>
     36 #include <string>
     37 #include <vector>
     38 
     39 #include <google/protobuf/wire_format.h>
     40 
     41 #include <google/protobuf/stubs/common.h>
     42 #include <google/protobuf/descriptor.h>
     43 #include <google/protobuf/wire_format_lite_inl.h>
     44 #include <google/protobuf/descriptor.pb.h>
     45 #include <google/protobuf/io/coded_stream.h>
     46 #include <google/protobuf/io/zero_copy_stream.h>
     47 #include <google/protobuf/io/zero_copy_stream_impl.h>
     48 #include <google/protobuf/unknown_field_set.h>
     49 
     50 
     51 namespace google {
     52 namespace protobuf {
     53 namespace internal {
     54 
     55 using internal::WireFormatLite;
     56 
     57 namespace {
     58 
     59 // This function turns out to be convenient when using some macros later.
     60 inline int GetEnumNumber(const EnumValueDescriptor* descriptor) {
     61   return descriptor->number();
     62 }
     63 
     64 }  // anonymous namespace
     65 
     66 // ===================================================================
     67 
     68 bool UnknownFieldSetFieldSkipper::SkipField(
     69     io::CodedInputStream* input, uint32 tag) {
     70   return WireFormat::SkipField(input, tag, unknown_fields_);
     71 }
     72 
     73 bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) {
     74   return WireFormat::SkipMessage(input, unknown_fields_);
     75 }
     76 
     77 void UnknownFieldSetFieldSkipper::SkipUnknownEnum(
     78     int field_number, int value) {
     79   unknown_fields_->AddVarint(field_number, value);
     80 }
     81 
     82 bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag,
     83                            UnknownFieldSet* unknown_fields) {
     84   int number = WireFormatLite::GetTagFieldNumber(tag);
     85 
     86   switch (WireFormatLite::GetTagWireType(tag)) {
     87     case WireFormatLite::WIRETYPE_VARINT: {
     88       uint64 value;
     89       if (!input->ReadVarint64(&value)) return false;
     90       if (unknown_fields != NULL) unknown_fields->AddVarint(number, value);
     91       return true;
     92     }
     93     case WireFormatLite::WIRETYPE_FIXED64: {
     94       uint64 value;
     95       if (!input->ReadLittleEndian64(&value)) return false;
     96       if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value);
     97       return true;
     98     }
     99     case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
    100       uint32 length;
    101       if (!input->ReadVarint32(&length)) return false;
    102       if (unknown_fields == NULL) {
    103         if (!input->Skip(length)) return false;
    104       } else {
    105         if (!input->ReadString(unknown_fields->AddLengthDelimited(number),
    106                                length)) {
    107           return false;
    108         }
    109       }
    110       return true;
    111     }
    112     case WireFormatLite::WIRETYPE_START_GROUP: {
    113       if (!input->IncrementRecursionDepth()) return false;
    114       if (!SkipMessage(input, (unknown_fields == NULL) ?
    115                               NULL : unknown_fields->AddGroup(number))) {
    116         return false;
    117       }
    118       input->DecrementRecursionDepth();
    119       // Check that the ending tag matched the starting tag.
    120       if (!input->LastTagWas(WireFormatLite::MakeTag(
    121           WireFormatLite::GetTagFieldNumber(tag),
    122           WireFormatLite::WIRETYPE_END_GROUP))) {
    123         return false;
    124       }
    125       return true;
    126     }
    127     case WireFormatLite::WIRETYPE_END_GROUP: {
    128       return false;
    129     }
    130     case WireFormatLite::WIRETYPE_FIXED32: {
    131       uint32 value;
    132       if (!input->ReadLittleEndian32(&value)) return false;
    133       if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value);
    134       return true;
    135     }
    136     default: {
    137       return false;
    138     }
    139   }
    140 }
    141 
    142 bool WireFormat::SkipMessage(io::CodedInputStream* input,
    143                              UnknownFieldSet* unknown_fields) {
    144   while(true) {
    145     uint32 tag = input->ReadTag();
    146     if (tag == 0) {
    147       // End of input.  This is a valid place to end, so return true.
    148       return true;
    149     }
    150 
    151     WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag);
    152 
    153     if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) {
    154       // Must be the end of the message.
    155       return true;
    156     }
    157 
    158     if (!SkipField(input, tag, unknown_fields)) return false;
    159   }
    160 }
    161 
    162 void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields,
    163                                         io::CodedOutputStream* output) {
    164   for (int i = 0; i < unknown_fields.field_count(); i++) {
    165     const UnknownField& field = unknown_fields.field(i);
    166     switch (field.type()) {
    167       case UnknownField::TYPE_VARINT:
    168         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
    169             WireFormatLite::WIRETYPE_VARINT));
    170         output->WriteVarint64(field.varint());
    171         break;
    172       case UnknownField::TYPE_FIXED32:
    173         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
    174             WireFormatLite::WIRETYPE_FIXED32));
    175         output->WriteLittleEndian32(field.fixed32());
    176         break;
    177       case UnknownField::TYPE_FIXED64:
    178         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
    179             WireFormatLite::WIRETYPE_FIXED64));
    180         output->WriteLittleEndian64(field.fixed64());
    181         break;
    182       case UnknownField::TYPE_LENGTH_DELIMITED:
    183         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
    184             WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
    185         output->WriteVarint32(field.length_delimited().size());
    186         output->WriteString(field.length_delimited());
    187         break;
    188       case UnknownField::TYPE_GROUP:
    189         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
    190             WireFormatLite::WIRETYPE_START_GROUP));
    191         SerializeUnknownFields(field.group(), output);
    192         output->WriteVarint32(WireFormatLite::MakeTag(field.number(),
    193             WireFormatLite::WIRETYPE_END_GROUP));
    194         break;
    195     }
    196   }
    197 }
    198 
    199 uint8* WireFormat::SerializeUnknownFieldsToArray(
    200     const UnknownFieldSet& unknown_fields,
    201     uint8* target) {
    202   for (int i = 0; i < unknown_fields.field_count(); i++) {
    203     const UnknownField& field = unknown_fields.field(i);
    204 
    205     switch (field.type()) {
    206       case UnknownField::TYPE_VARINT:
    207         target = WireFormatLite::WriteInt64ToArray(
    208             field.number(), field.varint(), target);
    209         break;
    210       case UnknownField::TYPE_FIXED32:
    211         target = WireFormatLite::WriteFixed32ToArray(
    212             field.number(), field.fixed32(), target);
    213         break;
    214       case UnknownField::TYPE_FIXED64:
    215         target = WireFormatLite::WriteFixed64ToArray(
    216             field.number(), field.fixed64(), target);
    217         break;
    218       case UnknownField::TYPE_LENGTH_DELIMITED:
    219         target = WireFormatLite::WriteBytesToArray(
    220             field.number(), field.length_delimited(), target);
    221         break;
    222       case UnknownField::TYPE_GROUP:
    223         target = WireFormatLite::WriteTagToArray(
    224             field.number(), WireFormatLite::WIRETYPE_START_GROUP, target);
    225         target = SerializeUnknownFieldsToArray(field.group(), target);
    226         target = WireFormatLite::WriteTagToArray(
    227             field.number(), WireFormatLite::WIRETYPE_END_GROUP, target);
    228         break;
    229     }
    230   }
    231   return target;
    232 }
    233 
    234 void WireFormat::SerializeUnknownMessageSetItems(
    235     const UnknownFieldSet& unknown_fields,
    236     io::CodedOutputStream* output) {
    237   for (int i = 0; i < unknown_fields.field_count(); i++) {
    238     const UnknownField& field = unknown_fields.field(i);
    239     // The only unknown fields that are allowed to exist in a MessageSet are
    240     // messages, which are length-delimited.
    241     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
    242       const string& data = field.length_delimited();
    243 
    244       // Start group.
    245       output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
    246 
    247       // Write type ID.
    248       output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
    249       output->WriteVarint32(field.number());
    250 
    251       // Write message.
    252       output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
    253       output->WriteVarint32(data.size());
    254       output->WriteString(data);
    255 
    256       // End group.
    257       output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
    258     }
    259   }
    260 }
    261 
    262 uint8* WireFormat::SerializeUnknownMessageSetItemsToArray(
    263     const UnknownFieldSet& unknown_fields,
    264     uint8* target) {
    265   for (int i = 0; i < unknown_fields.field_count(); i++) {
    266     const UnknownField& field = unknown_fields.field(i);
    267 
    268     // The only unknown fields that are allowed to exist in a MessageSet are
    269     // messages, which are length-delimited.
    270     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
    271       const string& data = field.length_delimited();
    272 
    273       // Start group.
    274       target = io::CodedOutputStream::WriteTagToArray(
    275           WireFormatLite::kMessageSetItemStartTag, target);
    276 
    277       // Write type ID.
    278       target = io::CodedOutputStream::WriteTagToArray(
    279           WireFormatLite::kMessageSetTypeIdTag, target);
    280       target = io::CodedOutputStream::WriteVarint32ToArray(
    281           field.number(), target);
    282 
    283       // Write message.
    284       target = io::CodedOutputStream::WriteTagToArray(
    285           WireFormatLite::kMessageSetMessageTag, target);
    286       target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target);
    287       target = io::CodedOutputStream::WriteStringToArray(data, target);
    288 
    289       // End group.
    290       target = io::CodedOutputStream::WriteTagToArray(
    291           WireFormatLite::kMessageSetItemEndTag, target);
    292     }
    293   }
    294 
    295   return target;
    296 }
    297 
    298 int WireFormat::ComputeUnknownFieldsSize(
    299     const UnknownFieldSet& unknown_fields) {
    300   int size = 0;
    301   for (int i = 0; i < unknown_fields.field_count(); i++) {
    302     const UnknownField& field = unknown_fields.field(i);
    303 
    304     switch (field.type()) {
    305       case UnknownField::TYPE_VARINT:
    306         size += io::CodedOutputStream::VarintSize32(
    307             WireFormatLite::MakeTag(field.number(),
    308             WireFormatLite::WIRETYPE_VARINT));
    309         size += io::CodedOutputStream::VarintSize64(field.varint());
    310         break;
    311       case UnknownField::TYPE_FIXED32:
    312         size += io::CodedOutputStream::VarintSize32(
    313             WireFormatLite::MakeTag(field.number(),
    314             WireFormatLite::WIRETYPE_FIXED32));
    315         size += sizeof(int32);
    316         break;
    317       case UnknownField::TYPE_FIXED64:
    318         size += io::CodedOutputStream::VarintSize32(
    319             WireFormatLite::MakeTag(field.number(),
    320             WireFormatLite::WIRETYPE_FIXED64));
    321         size += sizeof(int64);
    322         break;
    323       case UnknownField::TYPE_LENGTH_DELIMITED:
    324         size += io::CodedOutputStream::VarintSize32(
    325             WireFormatLite::MakeTag(field.number(),
    326             WireFormatLite::WIRETYPE_LENGTH_DELIMITED));
    327         size += io::CodedOutputStream::VarintSize32(
    328             field.length_delimited().size());
    329         size += field.length_delimited().size();
    330         break;
    331       case UnknownField::TYPE_GROUP:
    332         size += io::CodedOutputStream::VarintSize32(
    333             WireFormatLite::MakeTag(field.number(),
    334             WireFormatLite::WIRETYPE_START_GROUP));
    335         size += ComputeUnknownFieldsSize(field.group());
    336         size += io::CodedOutputStream::VarintSize32(
    337             WireFormatLite::MakeTag(field.number(),
    338             WireFormatLite::WIRETYPE_END_GROUP));
    339         break;
    340     }
    341   }
    342 
    343   return size;
    344 }
    345 
    346 int WireFormat::ComputeUnknownMessageSetItemsSize(
    347     const UnknownFieldSet& unknown_fields) {
    348   int size = 0;
    349   for (int i = 0; i < unknown_fields.field_count(); i++) {
    350     const UnknownField& field = unknown_fields.field(i);
    351 
    352     // The only unknown fields that are allowed to exist in a MessageSet are
    353     // messages, which are length-delimited.
    354     if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) {
    355       size += WireFormatLite::kMessageSetItemTagsSize;
    356       size += io::CodedOutputStream::VarintSize32(field.number());
    357       size += io::CodedOutputStream::VarintSize32(
    358         field.length_delimited().size());
    359       size += field.length_delimited().size();
    360     }
    361   }
    362 
    363   return size;
    364 }
    365 
    366 // ===================================================================
    367 
    368 bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input,
    369                                       Message* message) {
    370   const Descriptor* descriptor = message->GetDescriptor();
    371   const Reflection* message_reflection = message->GetReflection();
    372 
    373   while(true) {
    374     uint32 tag = input->ReadTag();
    375     if (tag == 0) {
    376       // End of input.  This is a valid place to end, so return true.
    377       return true;
    378     }
    379 
    380     if (WireFormatLite::GetTagWireType(tag) ==
    381         WireFormatLite::WIRETYPE_END_GROUP) {
    382       // Must be the end of the message.
    383       return true;
    384     }
    385 
    386     const FieldDescriptor* field = NULL;
    387 
    388     if (descriptor != NULL) {
    389       int field_number = WireFormatLite::GetTagFieldNumber(tag);
    390       field = descriptor->FindFieldByNumber(field_number);
    391 
    392       // If that failed, check if the field is an extension.
    393       if (field == NULL && descriptor->IsExtensionNumber(field_number)) {
    394         if (input->GetExtensionPool() == NULL) {
    395           field = message_reflection->FindKnownExtensionByNumber(field_number);
    396         } else {
    397           field = input->GetExtensionPool()
    398                        ->FindExtensionByNumber(descriptor, field_number);
    399         }
    400       }
    401 
    402       // If that failed, but we're a MessageSet, and this is the tag for a
    403       // MessageSet item, then parse that.
    404       if (field == NULL &&
    405           descriptor->options().message_set_wire_format() &&
    406           tag == WireFormatLite::kMessageSetItemStartTag) {
    407         if (!ParseAndMergeMessageSetItem(input, message)) {
    408           return false;
    409         }
    410         continue;  // Skip ParseAndMergeField(); already taken care of.
    411       }
    412     }
    413 
    414     if (!ParseAndMergeField(tag, field, message, input)) {
    415       return false;
    416     }
    417   }
    418 }
    419 
    420 bool WireFormat::ParseAndMergeField(
    421     uint32 tag,
    422     const FieldDescriptor* field,        // May be NULL for unknown
    423     Message* message,
    424     io::CodedInputStream* input) {
    425   const Reflection* message_reflection = message->GetReflection();
    426 
    427   enum { UNKNOWN, NORMAL_FORMAT, PACKED_FORMAT } value_format;
    428 
    429   if (field == NULL) {
    430     value_format = UNKNOWN;
    431   } else if (WireFormatLite::GetTagWireType(tag) ==
    432              WireTypeForFieldType(field->type())) {
    433     value_format = NORMAL_FORMAT;
    434   } else if (field->is_packable() &&
    435              WireFormatLite::GetTagWireType(tag) ==
    436              WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
    437     value_format = PACKED_FORMAT;
    438   } else {
    439     // We don't recognize this field. Either the field number is unknown
    440     // or the wire type doesn't match. Put it in our unknown field set.
    441     value_format = UNKNOWN;
    442   }
    443 
    444   if (value_format == UNKNOWN) {
    445     return SkipField(input, tag,
    446                      message_reflection->MutableUnknownFields(message));
    447   } else if (value_format == PACKED_FORMAT) {
    448     uint32 length;
    449     if (!input->ReadVarint32(&length)) return false;
    450     io::CodedInputStream::Limit limit = input->PushLimit(length);
    451 
    452     switch (field->type()) {
    453 #define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                      \
    454       case FieldDescriptor::TYPE_##TYPE: {                                     \
    455         while (input->BytesUntilLimit() > 0) {                                 \
    456           CPPTYPE value;                                                       \
    457           if (!WireFormatLite::ReadPrimitive<                                  \
    458                 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))          \
    459             return false;                                                      \
    460           message_reflection->Add##CPPTYPE_METHOD(message, field, value);      \
    461         }                                                                      \
    462         break;                                                                 \
    463       }
    464 
    465       HANDLE_PACKED_TYPE( INT32,  int32,  Int32)
    466       HANDLE_PACKED_TYPE( INT64,  int64,  Int64)
    467       HANDLE_PACKED_TYPE(SINT32,  int32,  Int32)
    468       HANDLE_PACKED_TYPE(SINT64,  int64,  Int64)
    469       HANDLE_PACKED_TYPE(UINT32, uint32, UInt32)
    470       HANDLE_PACKED_TYPE(UINT64, uint64, UInt64)
    471 
    472       HANDLE_PACKED_TYPE( FIXED32, uint32, UInt32)
    473       HANDLE_PACKED_TYPE( FIXED64, uint64, UInt64)
    474       HANDLE_PACKED_TYPE(SFIXED32,  int32,  Int32)
    475       HANDLE_PACKED_TYPE(SFIXED64,  int64,  Int64)
    476 
    477       HANDLE_PACKED_TYPE(FLOAT , float , Float )
    478       HANDLE_PACKED_TYPE(DOUBLE, double, Double)
    479 
    480       HANDLE_PACKED_TYPE(BOOL, bool, Bool)
    481 #undef HANDLE_PACKED_TYPE
    482 
    483       case FieldDescriptor::TYPE_ENUM: {
    484         while (input->BytesUntilLimit() > 0) {
    485           int value;
    486           if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
    487                   input, &value)) return false;
    488           const EnumValueDescriptor* enum_value =
    489               field->enum_type()->FindValueByNumber(value);
    490           if (enum_value != NULL) {
    491             message_reflection->AddEnum(message, field, enum_value);
    492           }
    493         }
    494 
    495         break;
    496       }
    497 
    498       case FieldDescriptor::TYPE_STRING:
    499       case FieldDescriptor::TYPE_GROUP:
    500       case FieldDescriptor::TYPE_MESSAGE:
    501       case FieldDescriptor::TYPE_BYTES:
    502         // Can't have packed fields of these types: these should be caught by
    503         // the protocol compiler.
    504         return false;
    505         break;
    506     }
    507 
    508     input->PopLimit(limit);
    509   } else {
    510     // Non-packed value (value_format == NORMAL_FORMAT)
    511     switch (field->type()) {
    512 #define HANDLE_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD)                            \
    513       case FieldDescriptor::TYPE_##TYPE: {                                    \
    514         CPPTYPE value;                                                        \
    515         if (!WireFormatLite::ReadPrimitive<                                   \
    516                 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value))         \
    517           return false;                                                       \
    518         if (field->is_repeated()) {                                           \
    519           message_reflection->Add##CPPTYPE_METHOD(message, field, value);     \
    520         } else {                                                              \
    521           message_reflection->Set##CPPTYPE_METHOD(message, field, value);     \
    522         }                                                                     \
    523         break;                                                                \
    524       }
    525 
    526       HANDLE_TYPE( INT32,  int32,  Int32)
    527       HANDLE_TYPE( INT64,  int64,  Int64)
    528       HANDLE_TYPE(SINT32,  int32,  Int32)
    529       HANDLE_TYPE(SINT64,  int64,  Int64)
    530       HANDLE_TYPE(UINT32, uint32, UInt32)
    531       HANDLE_TYPE(UINT64, uint64, UInt64)
    532 
    533       HANDLE_TYPE( FIXED32, uint32, UInt32)
    534       HANDLE_TYPE( FIXED64, uint64, UInt64)
    535       HANDLE_TYPE(SFIXED32,  int32,  Int32)
    536       HANDLE_TYPE(SFIXED64,  int64,  Int64)
    537 
    538       HANDLE_TYPE(FLOAT , float , Float )
    539       HANDLE_TYPE(DOUBLE, double, Double)
    540 
    541       HANDLE_TYPE(BOOL, bool, Bool)
    542 #undef HANDLE_TYPE
    543 
    544       case FieldDescriptor::TYPE_ENUM: {
    545         int value;
    546         if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>(
    547                 input, &value)) return false;
    548         const EnumValueDescriptor* enum_value =
    549           field->enum_type()->FindValueByNumber(value);
    550         if (enum_value != NULL) {
    551           if (field->is_repeated()) {
    552             message_reflection->AddEnum(message, field, enum_value);
    553           } else {
    554             message_reflection->SetEnum(message, field, enum_value);
    555           }
    556         } else {
    557           // The enum value is not one of the known values.  Add it to the
    558           // UnknownFieldSet.
    559           int64 sign_extended_value = static_cast<int64>(value);
    560           message_reflection->MutableUnknownFields(message)
    561                             ->AddVarint(WireFormatLite::GetTagFieldNumber(tag),
    562                                         sign_extended_value);
    563         }
    564         break;
    565       }
    566 
    567       // Handle strings separately so that we can optimize the ctype=CORD case.
    568       case FieldDescriptor::TYPE_STRING: {
    569         string value;
    570         if (!WireFormatLite::ReadString(input, &value)) return false;
    571         VerifyUTF8String(value.data(), value.length(), PARSE);
    572         if (field->is_repeated()) {
    573           message_reflection->AddString(message, field, value);
    574         } else {
    575           message_reflection->SetString(message, field, value);
    576         }
    577         break;
    578       }
    579 
    580       case FieldDescriptor::TYPE_BYTES: {
    581         string value;
    582         if (!WireFormatLite::ReadBytes(input, &value)) return false;
    583         if (field->is_repeated()) {
    584           message_reflection->AddString(message, field, value);
    585         } else {
    586           message_reflection->SetString(message, field, value);
    587         }
    588         break;
    589       }
    590 
    591       case FieldDescriptor::TYPE_GROUP: {
    592         Message* sub_message;
    593         if (field->is_repeated()) {
    594           sub_message = message_reflection->AddMessage(
    595               message, field, input->GetExtensionFactory());
    596         } else {
    597           sub_message = message_reflection->MutableMessage(
    598               message, field, input->GetExtensionFactory());
    599         }
    600 
    601         if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
    602                                        input, sub_message))
    603           return false;
    604         break;
    605       }
    606 
    607       case FieldDescriptor::TYPE_MESSAGE: {
    608         Message* sub_message;
    609         if (field->is_repeated()) {
    610           sub_message = message_reflection->AddMessage(
    611               message, field, input->GetExtensionFactory());
    612         } else {
    613           sub_message = message_reflection->MutableMessage(
    614               message, field, input->GetExtensionFactory());
    615         }
    616 
    617         if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
    618         break;
    619       }
    620     }
    621   }
    622 
    623   return true;
    624 }
    625 
    626 bool WireFormat::ParseAndMergeMessageSetItem(
    627     io::CodedInputStream* input,
    628     Message* message) {
    629   const Reflection* message_reflection = message->GetReflection();
    630 
    631   // This method parses a group which should contain two fields:
    632   //   required int32 type_id = 2;
    633   //   required data message = 3;
    634 
    635   // Once we see a type_id, we'll construct a fake tag for this extension
    636   // which is the tag it would have had under the proto2 extensions wire
    637   // format.
    638   uint32 fake_tag = 0;
    639 
    640   // Once we see a type_id, we'll look up the FieldDescriptor for the
    641   // extension.
    642   const FieldDescriptor* field = NULL;
    643 
    644   // If we see message data before the type_id, we'll append it to this so
    645   // we can parse it later.  This will probably never happen in practice,
    646   // as no MessageSet encoder I know of writes the message before the type ID.
    647   // But, it's technically valid so we should allow it.
    648   // TODO(kenton):  Use a Cord instead?  Do I care?
    649   string message_data;
    650 
    651   while (true) {
    652     uint32 tag = input->ReadTag();
    653     if (tag == 0) return false;
    654 
    655     switch (tag) {
    656       case WireFormatLite::kMessageSetTypeIdTag: {
    657         uint32 type_id;
    658         if (!input->ReadVarint32(&type_id)) return false;
    659         fake_tag = WireFormatLite::MakeTag(
    660             type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
    661         field = message_reflection->FindKnownExtensionByNumber(type_id);
    662 
    663         if (!message_data.empty()) {
    664           // We saw some message data before the type_id.  Have to parse it
    665           // now.
    666           io::ArrayInputStream raw_input(message_data.data(),
    667                                          message_data.size());
    668           io::CodedInputStream sub_input(&raw_input);
    669           if (!ParseAndMergeField(fake_tag, field, message,
    670                                   &sub_input)) {
    671             return false;
    672           }
    673           message_data.clear();
    674         }
    675 
    676         break;
    677       }
    678 
    679       case WireFormatLite::kMessageSetMessageTag: {
    680         if (fake_tag == 0) {
    681           // We haven't seen a type_id yet.  Append this data to message_data.
    682           string temp;
    683           uint32 length;
    684           if (!input->ReadVarint32(&length)) return false;
    685           if (!input->ReadString(&temp, length)) return false;
    686           message_data.append(temp);
    687         } else {
    688           // Already saw type_id, so we can parse this directly.
    689           if (!ParseAndMergeField(fake_tag, field, message, input)) {
    690             return false;
    691           }
    692         }
    693 
    694         break;
    695       }
    696 
    697       case WireFormatLite::kMessageSetItemEndTag: {
    698         return true;
    699       }
    700 
    701       default: {
    702         if (!SkipField(input, tag, NULL)) return false;
    703       }
    704     }
    705   }
    706 }
    707 
    708 // ===================================================================
    709 
    710 void WireFormat::SerializeWithCachedSizes(
    711     const Message& message,
    712     int size, io::CodedOutputStream* output) {
    713   const Descriptor* descriptor = message.GetDescriptor();
    714   const Reflection* message_reflection = message.GetReflection();
    715   int expected_endpoint = output->ByteCount() + size;
    716 
    717   vector<const FieldDescriptor*> fields;
    718   message_reflection->ListFields(message, &fields);
    719   for (int i = 0; i < fields.size(); i++) {
    720     SerializeFieldWithCachedSizes(fields[i], message, output);
    721   }
    722 
    723   if (descriptor->options().message_set_wire_format()) {
    724     SerializeUnknownMessageSetItems(
    725         message_reflection->GetUnknownFields(message), output);
    726   } else {
    727     SerializeUnknownFields(
    728         message_reflection->GetUnknownFields(message), output);
    729   }
    730 
    731   GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint)
    732     << ": Protocol message serialized to a size different from what was "
    733        "originally expected.  Perhaps it was modified by another thread "
    734        "during serialization?";
    735 }
    736 
    737 void WireFormat::SerializeFieldWithCachedSizes(
    738     const FieldDescriptor* field,
    739     const Message& message,
    740     io::CodedOutputStream* output) {
    741   const Reflection* message_reflection = message.GetReflection();
    742 
    743   if (field->is_extension() &&
    744       field->containing_type()->options().message_set_wire_format() &&
    745       field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
    746       !field->is_repeated()) {
    747     SerializeMessageSetItemWithCachedSizes(field, message, output);
    748     return;
    749   }
    750 
    751   int count = 0;
    752 
    753   if (field->is_repeated()) {
    754     count = message_reflection->FieldSize(message, field);
    755   } else if (message_reflection->HasField(message, field)) {
    756     count = 1;
    757   }
    758 
    759   const bool is_packed = field->options().packed();
    760   if (is_packed && count > 0) {
    761     WireFormatLite::WriteTag(field->number(),
    762         WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output);
    763     const int data_size = FieldDataOnlyByteSize(field, message);
    764     output->WriteVarint32(data_size);
    765   }
    766 
    767   for (int j = 0; j < count; j++) {
    768     switch (field->type()) {
    769 #define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD)      \
    770       case FieldDescriptor::TYPE_##TYPE: {                                     \
    771         const CPPTYPE value = field->is_repeated() ?                           \
    772                               message_reflection->GetRepeated##CPPTYPE_METHOD( \
    773                                 message, field, j) :                           \
    774                               message_reflection->Get##CPPTYPE_METHOD(         \
    775                                 message, field);                               \
    776         if (is_packed) {                                                       \
    777           WireFormatLite::Write##TYPE_METHOD##NoTag(value, output);            \
    778         } else {                                                               \
    779           WireFormatLite::Write##TYPE_METHOD(field->number(), value, output);  \
    780         }                                                                      \
    781         break;                                                                 \
    782       }
    783 
    784       HANDLE_PRIMITIVE_TYPE( INT32,  int32,  Int32,  Int32)
    785       HANDLE_PRIMITIVE_TYPE( INT64,  int64,  Int64,  Int64)
    786       HANDLE_PRIMITIVE_TYPE(SINT32,  int32, SInt32,  Int32)
    787       HANDLE_PRIMITIVE_TYPE(SINT64,  int64, SInt64,  Int64)
    788       HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32)
    789       HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64)
    790 
    791       HANDLE_PRIMITIVE_TYPE( FIXED32, uint32,  Fixed32, UInt32)
    792       HANDLE_PRIMITIVE_TYPE( FIXED64, uint64,  Fixed64, UInt64)
    793       HANDLE_PRIMITIVE_TYPE(SFIXED32,  int32, SFixed32,  Int32)
    794       HANDLE_PRIMITIVE_TYPE(SFIXED64,  int64, SFixed64,  Int64)
    795 
    796       HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float )
    797       HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double)
    798 
    799       HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool)
    800 #undef HANDLE_PRIMITIVE_TYPE
    801 
    802 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                       \
    803       case FieldDescriptor::TYPE_##TYPE:                                     \
    804         WireFormatLite::Write##TYPE_METHOD(                                  \
    805               field->number(),                                               \
    806               field->is_repeated() ?                                         \
    807                 message_reflection->GetRepeated##CPPTYPE_METHOD(             \
    808                   message, field, j) :                                       \
    809                 message_reflection->Get##CPPTYPE_METHOD(message, field),     \
    810               output);                                                       \
    811         break;
    812 
    813       HANDLE_TYPE(GROUP  , Group  , Message)
    814       HANDLE_TYPE(MESSAGE, Message, Message)
    815 #undef HANDLE_TYPE
    816 
    817       case FieldDescriptor::TYPE_ENUM: {
    818         const EnumValueDescriptor* value = field->is_repeated() ?
    819           message_reflection->GetRepeatedEnum(message, field, j) :
    820           message_reflection->GetEnum(message, field);
    821         if (is_packed) {
    822           WireFormatLite::WriteEnumNoTag(value->number(), output);
    823         } else {
    824           WireFormatLite::WriteEnum(field->number(), value->number(), output);
    825         }
    826         break;
    827       }
    828 
    829       // Handle strings separately so that we can get string references
    830       // instead of copying.
    831       case FieldDescriptor::TYPE_STRING: {
    832         string scratch;
    833         const string& value = field->is_repeated() ?
    834           message_reflection->GetRepeatedStringReference(
    835             message, field, j, &scratch) :
    836           message_reflection->GetStringReference(message, field, &scratch);
    837         VerifyUTF8String(value.data(), value.length(), SERIALIZE);
    838         WireFormatLite::WriteString(field->number(), value, output);
    839         break;
    840       }
    841 
    842       case FieldDescriptor::TYPE_BYTES: {
    843         string scratch;
    844         const string& value = field->is_repeated() ?
    845           message_reflection->GetRepeatedStringReference(
    846             message, field, j, &scratch) :
    847           message_reflection->GetStringReference(message, field, &scratch);
    848         WireFormatLite::WriteBytes(field->number(), value, output);
    849         break;
    850       }
    851     }
    852   }
    853 }
    854 
    855 void WireFormat::SerializeMessageSetItemWithCachedSizes(
    856     const FieldDescriptor* field,
    857     const Message& message,
    858     io::CodedOutputStream* output) {
    859   const Reflection* message_reflection = message.GetReflection();
    860 
    861   // Start group.
    862   output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag);
    863 
    864   // Write type ID.
    865   output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag);
    866   output->WriteVarint32(field->number());
    867 
    868   // Write message.
    869   output->WriteVarint32(WireFormatLite::kMessageSetMessageTag);
    870 
    871   const Message& sub_message = message_reflection->GetMessage(message, field);
    872   output->WriteVarint32(sub_message.GetCachedSize());
    873   sub_message.SerializeWithCachedSizes(output);
    874 
    875   // End group.
    876   output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag);
    877 }
    878 
    879 // ===================================================================
    880 
    881 int WireFormat::ByteSize(const Message& message) {
    882   const Descriptor* descriptor = message.GetDescriptor();
    883   const Reflection* message_reflection = message.GetReflection();
    884 
    885   int our_size = 0;
    886 
    887   vector<const FieldDescriptor*> fields;
    888   message_reflection->ListFields(message, &fields);
    889   for (int i = 0; i < fields.size(); i++) {
    890     our_size += FieldByteSize(fields[i], message);
    891   }
    892 
    893   if (descriptor->options().message_set_wire_format()) {
    894     our_size += ComputeUnknownMessageSetItemsSize(
    895       message_reflection->GetUnknownFields(message));
    896   } else {
    897     our_size += ComputeUnknownFieldsSize(
    898       message_reflection->GetUnknownFields(message));
    899   }
    900 
    901   return our_size;
    902 }
    903 
    904 int WireFormat::FieldByteSize(
    905     const FieldDescriptor* field,
    906     const Message& message) {
    907   const Reflection* message_reflection = message.GetReflection();
    908 
    909   if (field->is_extension() &&
    910       field->containing_type()->options().message_set_wire_format() &&
    911       field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
    912       !field->is_repeated()) {
    913     return MessageSetItemByteSize(field, message);
    914   }
    915 
    916   int count = 0;
    917   if (field->is_repeated()) {
    918     count = message_reflection->FieldSize(message, field);
    919   } else if (message_reflection->HasField(message, field)) {
    920     count = 1;
    921   }
    922 
    923   const int data_size = FieldDataOnlyByteSize(field, message);
    924   int our_size = data_size;
    925   if (field->options().packed()) {
    926     if (data_size > 0) {
    927       // Packed fields get serialized like a string, not their native type.
    928       // Technically this doesn't really matter; the size only changes if it's
    929       // a GROUP
    930       our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING);
    931       our_size += io::CodedOutputStream::VarintSize32(data_size);
    932     }
    933   } else {
    934     our_size += count * TagSize(field->number(), field->type());
    935   }
    936   return our_size;
    937 }
    938 
    939 int WireFormat::FieldDataOnlyByteSize(
    940     const FieldDescriptor* field,
    941     const Message& message) {
    942   const Reflection* message_reflection = message.GetReflection();
    943 
    944   int count = 0;
    945   if (field->is_repeated()) {
    946     count = message_reflection->FieldSize(message, field);
    947   } else if (message_reflection->HasField(message, field)) {
    948     count = 1;
    949   }
    950 
    951   int data_size = 0;
    952   switch (field->type()) {
    953 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD)                     \
    954     case FieldDescriptor::TYPE_##TYPE:                                     \
    955       if (field->is_repeated()) {                                          \
    956         for (int j = 0; j < count; j++) {                                  \
    957           data_size += WireFormatLite::TYPE_METHOD##Size(                  \
    958             message_reflection->GetRepeated##CPPTYPE_METHOD(               \
    959               message, field, j));                                         \
    960         }                                                                  \
    961       } else {                                                             \
    962         data_size += WireFormatLite::TYPE_METHOD##Size(                    \
    963           message_reflection->Get##CPPTYPE_METHOD(message, field));        \
    964       }                                                                    \
    965       break;
    966 
    967 #define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD)                               \
    968     case FieldDescriptor::TYPE_##TYPE:                                     \
    969       data_size += count * WireFormatLite::k##TYPE_METHOD##Size;           \
    970       break;
    971 
    972     HANDLE_TYPE( INT32,  Int32,  Int32)
    973     HANDLE_TYPE( INT64,  Int64,  Int64)
    974     HANDLE_TYPE(SINT32, SInt32,  Int32)
    975     HANDLE_TYPE(SINT64, SInt64,  Int64)
    976     HANDLE_TYPE(UINT32, UInt32, UInt32)
    977     HANDLE_TYPE(UINT64, UInt64, UInt64)
    978 
    979     HANDLE_FIXED_TYPE( FIXED32,  Fixed32)
    980     HANDLE_FIXED_TYPE( FIXED64,  Fixed64)
    981     HANDLE_FIXED_TYPE(SFIXED32, SFixed32)
    982     HANDLE_FIXED_TYPE(SFIXED64, SFixed64)
    983 
    984     HANDLE_FIXED_TYPE(FLOAT , Float )
    985     HANDLE_FIXED_TYPE(DOUBLE, Double)
    986 
    987     HANDLE_FIXED_TYPE(BOOL, Bool)
    988 
    989     HANDLE_TYPE(GROUP  , Group  , Message)
    990     HANDLE_TYPE(MESSAGE, Message, Message)
    991 #undef HANDLE_TYPE
    992 #undef HANDLE_FIXED_TYPE
    993 
    994     case FieldDescriptor::TYPE_ENUM: {
    995       if (field->is_repeated()) {
    996         for (int j = 0; j < count; j++) {
    997           data_size += WireFormatLite::EnumSize(
    998             message_reflection->GetRepeatedEnum(message, field, j)->number());
    999         }
   1000       } else {
   1001         data_size += WireFormatLite::EnumSize(
   1002           message_reflection->GetEnum(message, field)->number());
   1003       }
   1004       break;
   1005     }
   1006 
   1007     // Handle strings separately so that we can get string references
   1008     // instead of copying.
   1009     case FieldDescriptor::TYPE_STRING:
   1010     case FieldDescriptor::TYPE_BYTES: {
   1011       for (int j = 0; j < count; j++) {
   1012         string scratch;
   1013         const string& value = field->is_repeated() ?
   1014           message_reflection->GetRepeatedStringReference(
   1015             message, field, j, &scratch) :
   1016           message_reflection->GetStringReference(message, field, &scratch);
   1017         data_size += WireFormatLite::StringSize(value);
   1018       }
   1019       break;
   1020     }
   1021   }
   1022   return data_size;
   1023 }
   1024 
   1025 int WireFormat::MessageSetItemByteSize(
   1026     const FieldDescriptor* field,
   1027     const Message& message) {
   1028   const Reflection* message_reflection = message.GetReflection();
   1029 
   1030   int our_size = WireFormatLite::kMessageSetItemTagsSize;
   1031 
   1032   // type_id
   1033   our_size += io::CodedOutputStream::VarintSize32(field->number());
   1034 
   1035   // message
   1036   const Message& sub_message = message_reflection->GetMessage(message, field);
   1037   int message_size = sub_message.ByteSize();
   1038 
   1039   our_size += io::CodedOutputStream::VarintSize32(message_size);
   1040   our_size += message_size;
   1041 
   1042   return our_size;
   1043 }
   1044 
   1045 void WireFormat::VerifyUTF8StringFallback(const char* data,
   1046                                           int size,
   1047                                           Operation op) {
   1048   if (!IsStructurallyValidUTF8(data, size)) {
   1049     const char* operation_str = NULL;
   1050     switch (op) {
   1051       case PARSE:
   1052         operation_str = "parsing";
   1053         break;
   1054       case SERIALIZE:
   1055         operation_str = "serializing";
   1056         break;
   1057       // no default case: have the compiler warn if a case is not covered.
   1058     }
   1059     GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while "
   1060                << operation_str
   1061                << " protocol buffer. Strings must contain only UTF-8; "
   1062                   "use the 'bytes' type for raw bytes.";
   1063   }
   1064 }
   1065 
   1066 
   1067 }  // namespace internal
   1068 }  // namespace protobuf
   1069 }  // namespace google
   1070