1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 35 #include <stack> 36 #include <string> 37 #include <vector> 38 39 #include <google/protobuf/wire_format.h> 40 41 #include <google/protobuf/stubs/common.h> 42 #include <google/protobuf/descriptor.h> 43 #include <google/protobuf/wire_format_lite_inl.h> 44 #include <google/protobuf/descriptor.pb.h> 45 #include <google/protobuf/io/coded_stream.h> 46 #include <google/protobuf/io/zero_copy_stream.h> 47 #include <google/protobuf/io/zero_copy_stream_impl.h> 48 #include <google/protobuf/unknown_field_set.h> 49 50 51 namespace google { 52 namespace protobuf { 53 namespace internal { 54 55 using internal::WireFormatLite; 56 57 namespace { 58 59 // This function turns out to be convenient when using some macros later. 60 inline int GetEnumNumber(const EnumValueDescriptor* descriptor) { 61 return descriptor->number(); 62 } 63 64 } // anonymous namespace 65 66 // =================================================================== 67 68 bool UnknownFieldSetFieldSkipper::SkipField( 69 io::CodedInputStream* input, uint32 tag) { 70 return WireFormat::SkipField(input, tag, unknown_fields_); 71 } 72 73 bool UnknownFieldSetFieldSkipper::SkipMessage(io::CodedInputStream* input) { 74 return WireFormat::SkipMessage(input, unknown_fields_); 75 } 76 77 void UnknownFieldSetFieldSkipper::SkipUnknownEnum( 78 int field_number, int value) { 79 unknown_fields_->AddVarint(field_number, value); 80 } 81 82 bool WireFormat::SkipField(io::CodedInputStream* input, uint32 tag, 83 UnknownFieldSet* unknown_fields) { 84 int number = WireFormatLite::GetTagFieldNumber(tag); 85 86 switch (WireFormatLite::GetTagWireType(tag)) { 87 case WireFormatLite::WIRETYPE_VARINT: { 88 uint64 value; 89 if (!input->ReadVarint64(&value)) return false; 90 if (unknown_fields != NULL) unknown_fields->AddVarint(number, value); 91 return true; 92 } 93 case WireFormatLite::WIRETYPE_FIXED64: { 94 uint64 value; 95 if (!input->ReadLittleEndian64(&value)) return false; 96 if (unknown_fields != NULL) unknown_fields->AddFixed64(number, value); 97 return true; 98 } 99 case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: { 100 uint32 length; 101 if (!input->ReadVarint32(&length)) return false; 102 if (unknown_fields == NULL) { 103 if (!input->Skip(length)) return false; 104 } else { 105 if (!input->ReadString(unknown_fields->AddLengthDelimited(number), 106 length)) { 107 return false; 108 } 109 } 110 return true; 111 } 112 case WireFormatLite::WIRETYPE_START_GROUP: { 113 if (!input->IncrementRecursionDepth()) return false; 114 if (!SkipMessage(input, (unknown_fields == NULL) ? 115 NULL : unknown_fields->AddGroup(number))) { 116 return false; 117 } 118 input->DecrementRecursionDepth(); 119 // Check that the ending tag matched the starting tag. 120 if (!input->LastTagWas(WireFormatLite::MakeTag( 121 WireFormatLite::GetTagFieldNumber(tag), 122 WireFormatLite::WIRETYPE_END_GROUP))) { 123 return false; 124 } 125 return true; 126 } 127 case WireFormatLite::WIRETYPE_END_GROUP: { 128 return false; 129 } 130 case WireFormatLite::WIRETYPE_FIXED32: { 131 uint32 value; 132 if (!input->ReadLittleEndian32(&value)) return false; 133 if (unknown_fields != NULL) unknown_fields->AddFixed32(number, value); 134 return true; 135 } 136 default: { 137 return false; 138 } 139 } 140 } 141 142 bool WireFormat::SkipMessage(io::CodedInputStream* input, 143 UnknownFieldSet* unknown_fields) { 144 while(true) { 145 uint32 tag = input->ReadTag(); 146 if (tag == 0) { 147 // End of input. This is a valid place to end, so return true. 148 return true; 149 } 150 151 WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag); 152 153 if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) { 154 // Must be the end of the message. 155 return true; 156 } 157 158 if (!SkipField(input, tag, unknown_fields)) return false; 159 } 160 } 161 162 void WireFormat::SerializeUnknownFields(const UnknownFieldSet& unknown_fields, 163 io::CodedOutputStream* output) { 164 for (int i = 0; i < unknown_fields.field_count(); i++) { 165 const UnknownField& field = unknown_fields.field(i); 166 switch (field.type()) { 167 case UnknownField::TYPE_VARINT: 168 output->WriteVarint32(WireFormatLite::MakeTag(field.number(), 169 WireFormatLite::WIRETYPE_VARINT)); 170 output->WriteVarint64(field.varint()); 171 break; 172 case UnknownField::TYPE_FIXED32: 173 output->WriteVarint32(WireFormatLite::MakeTag(field.number(), 174 WireFormatLite::WIRETYPE_FIXED32)); 175 output->WriteLittleEndian32(field.fixed32()); 176 break; 177 case UnknownField::TYPE_FIXED64: 178 output->WriteVarint32(WireFormatLite::MakeTag(field.number(), 179 WireFormatLite::WIRETYPE_FIXED64)); 180 output->WriteLittleEndian64(field.fixed64()); 181 break; 182 case UnknownField::TYPE_LENGTH_DELIMITED: 183 output->WriteVarint32(WireFormatLite::MakeTag(field.number(), 184 WireFormatLite::WIRETYPE_LENGTH_DELIMITED)); 185 output->WriteVarint32(field.length_delimited().size()); 186 output->WriteString(field.length_delimited()); 187 break; 188 case UnknownField::TYPE_GROUP: 189 output->WriteVarint32(WireFormatLite::MakeTag(field.number(), 190 WireFormatLite::WIRETYPE_START_GROUP)); 191 SerializeUnknownFields(field.group(), output); 192 output->WriteVarint32(WireFormatLite::MakeTag(field.number(), 193 WireFormatLite::WIRETYPE_END_GROUP)); 194 break; 195 } 196 } 197 } 198 199 uint8* WireFormat::SerializeUnknownFieldsToArray( 200 const UnknownFieldSet& unknown_fields, 201 uint8* target) { 202 for (int i = 0; i < unknown_fields.field_count(); i++) { 203 const UnknownField& field = unknown_fields.field(i); 204 205 switch (field.type()) { 206 case UnknownField::TYPE_VARINT: 207 target = WireFormatLite::WriteInt64ToArray( 208 field.number(), field.varint(), target); 209 break; 210 case UnknownField::TYPE_FIXED32: 211 target = WireFormatLite::WriteFixed32ToArray( 212 field.number(), field.fixed32(), target); 213 break; 214 case UnknownField::TYPE_FIXED64: 215 target = WireFormatLite::WriteFixed64ToArray( 216 field.number(), field.fixed64(), target); 217 break; 218 case UnknownField::TYPE_LENGTH_DELIMITED: 219 target = WireFormatLite::WriteBytesToArray( 220 field.number(), field.length_delimited(), target); 221 break; 222 case UnknownField::TYPE_GROUP: 223 target = WireFormatLite::WriteTagToArray( 224 field.number(), WireFormatLite::WIRETYPE_START_GROUP, target); 225 target = SerializeUnknownFieldsToArray(field.group(), target); 226 target = WireFormatLite::WriteTagToArray( 227 field.number(), WireFormatLite::WIRETYPE_END_GROUP, target); 228 break; 229 } 230 } 231 return target; 232 } 233 234 void WireFormat::SerializeUnknownMessageSetItems( 235 const UnknownFieldSet& unknown_fields, 236 io::CodedOutputStream* output) { 237 for (int i = 0; i < unknown_fields.field_count(); i++) { 238 const UnknownField& field = unknown_fields.field(i); 239 // The only unknown fields that are allowed to exist in a MessageSet are 240 // messages, which are length-delimited. 241 if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) { 242 const string& data = field.length_delimited(); 243 244 // Start group. 245 output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag); 246 247 // Write type ID. 248 output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag); 249 output->WriteVarint32(field.number()); 250 251 // Write message. 252 output->WriteVarint32(WireFormatLite::kMessageSetMessageTag); 253 output->WriteVarint32(data.size()); 254 output->WriteString(data); 255 256 // End group. 257 output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag); 258 } 259 } 260 } 261 262 uint8* WireFormat::SerializeUnknownMessageSetItemsToArray( 263 const UnknownFieldSet& unknown_fields, 264 uint8* target) { 265 for (int i = 0; i < unknown_fields.field_count(); i++) { 266 const UnknownField& field = unknown_fields.field(i); 267 268 // The only unknown fields that are allowed to exist in a MessageSet are 269 // messages, which are length-delimited. 270 if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) { 271 const string& data = field.length_delimited(); 272 273 // Start group. 274 target = io::CodedOutputStream::WriteTagToArray( 275 WireFormatLite::kMessageSetItemStartTag, target); 276 277 // Write type ID. 278 target = io::CodedOutputStream::WriteTagToArray( 279 WireFormatLite::kMessageSetTypeIdTag, target); 280 target = io::CodedOutputStream::WriteVarint32ToArray( 281 field.number(), target); 282 283 // Write message. 284 target = io::CodedOutputStream::WriteTagToArray( 285 WireFormatLite::kMessageSetMessageTag, target); 286 target = io::CodedOutputStream::WriteVarint32ToArray(data.size(), target); 287 target = io::CodedOutputStream::WriteStringToArray(data, target); 288 289 // End group. 290 target = io::CodedOutputStream::WriteTagToArray( 291 WireFormatLite::kMessageSetItemEndTag, target); 292 } 293 } 294 295 return target; 296 } 297 298 int WireFormat::ComputeUnknownFieldsSize( 299 const UnknownFieldSet& unknown_fields) { 300 int size = 0; 301 for (int i = 0; i < unknown_fields.field_count(); i++) { 302 const UnknownField& field = unknown_fields.field(i); 303 304 switch (field.type()) { 305 case UnknownField::TYPE_VARINT: 306 size += io::CodedOutputStream::VarintSize32( 307 WireFormatLite::MakeTag(field.number(), 308 WireFormatLite::WIRETYPE_VARINT)); 309 size += io::CodedOutputStream::VarintSize64(field.varint()); 310 break; 311 case UnknownField::TYPE_FIXED32: 312 size += io::CodedOutputStream::VarintSize32( 313 WireFormatLite::MakeTag(field.number(), 314 WireFormatLite::WIRETYPE_FIXED32)); 315 size += sizeof(int32); 316 break; 317 case UnknownField::TYPE_FIXED64: 318 size += io::CodedOutputStream::VarintSize32( 319 WireFormatLite::MakeTag(field.number(), 320 WireFormatLite::WIRETYPE_FIXED64)); 321 size += sizeof(int64); 322 break; 323 case UnknownField::TYPE_LENGTH_DELIMITED: 324 size += io::CodedOutputStream::VarintSize32( 325 WireFormatLite::MakeTag(field.number(), 326 WireFormatLite::WIRETYPE_LENGTH_DELIMITED)); 327 size += io::CodedOutputStream::VarintSize32( 328 field.length_delimited().size()); 329 size += field.length_delimited().size(); 330 break; 331 case UnknownField::TYPE_GROUP: 332 size += io::CodedOutputStream::VarintSize32( 333 WireFormatLite::MakeTag(field.number(), 334 WireFormatLite::WIRETYPE_START_GROUP)); 335 size += ComputeUnknownFieldsSize(field.group()); 336 size += io::CodedOutputStream::VarintSize32( 337 WireFormatLite::MakeTag(field.number(), 338 WireFormatLite::WIRETYPE_END_GROUP)); 339 break; 340 } 341 } 342 343 return size; 344 } 345 346 int WireFormat::ComputeUnknownMessageSetItemsSize( 347 const UnknownFieldSet& unknown_fields) { 348 int size = 0; 349 for (int i = 0; i < unknown_fields.field_count(); i++) { 350 const UnknownField& field = unknown_fields.field(i); 351 352 // The only unknown fields that are allowed to exist in a MessageSet are 353 // messages, which are length-delimited. 354 if (field.type() == UnknownField::TYPE_LENGTH_DELIMITED) { 355 size += WireFormatLite::kMessageSetItemTagsSize; 356 size += io::CodedOutputStream::VarintSize32(field.number()); 357 size += io::CodedOutputStream::VarintSize32( 358 field.length_delimited().size()); 359 size += field.length_delimited().size(); 360 } 361 } 362 363 return size; 364 } 365 366 // =================================================================== 367 368 bool WireFormat::ParseAndMergePartial(io::CodedInputStream* input, 369 Message* message) { 370 const Descriptor* descriptor = message->GetDescriptor(); 371 const Reflection* message_reflection = message->GetReflection(); 372 373 while(true) { 374 uint32 tag = input->ReadTag(); 375 if (tag == 0) { 376 // End of input. This is a valid place to end, so return true. 377 return true; 378 } 379 380 if (WireFormatLite::GetTagWireType(tag) == 381 WireFormatLite::WIRETYPE_END_GROUP) { 382 // Must be the end of the message. 383 return true; 384 } 385 386 const FieldDescriptor* field = NULL; 387 388 if (descriptor != NULL) { 389 int field_number = WireFormatLite::GetTagFieldNumber(tag); 390 field = descriptor->FindFieldByNumber(field_number); 391 392 // If that failed, check if the field is an extension. 393 if (field == NULL && descriptor->IsExtensionNumber(field_number)) { 394 if (input->GetExtensionPool() == NULL) { 395 field = message_reflection->FindKnownExtensionByNumber(field_number); 396 } else { 397 field = input->GetExtensionPool() 398 ->FindExtensionByNumber(descriptor, field_number); 399 } 400 } 401 402 // If that failed, but we're a MessageSet, and this is the tag for a 403 // MessageSet item, then parse that. 404 if (field == NULL && 405 descriptor->options().message_set_wire_format() && 406 tag == WireFormatLite::kMessageSetItemStartTag) { 407 if (!ParseAndMergeMessageSetItem(input, message)) { 408 return false; 409 } 410 continue; // Skip ParseAndMergeField(); already taken care of. 411 } 412 } 413 414 if (!ParseAndMergeField(tag, field, message, input)) { 415 return false; 416 } 417 } 418 } 419 420 bool WireFormat::ParseAndMergeField( 421 uint32 tag, 422 const FieldDescriptor* field, // May be NULL for unknown 423 Message* message, 424 io::CodedInputStream* input) { 425 const Reflection* message_reflection = message->GetReflection(); 426 427 enum { UNKNOWN, NORMAL_FORMAT, PACKED_FORMAT } value_format; 428 429 if (field == NULL) { 430 value_format = UNKNOWN; 431 } else if (WireFormatLite::GetTagWireType(tag) == 432 WireTypeForFieldType(field->type())) { 433 value_format = NORMAL_FORMAT; 434 } else if (field->is_packable() && 435 WireFormatLite::GetTagWireType(tag) == 436 WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { 437 value_format = PACKED_FORMAT; 438 } else { 439 // We don't recognize this field. Either the field number is unknown 440 // or the wire type doesn't match. Put it in our unknown field set. 441 value_format = UNKNOWN; 442 } 443 444 if (value_format == UNKNOWN) { 445 return SkipField(input, tag, 446 message_reflection->MutableUnknownFields(message)); 447 } else if (value_format == PACKED_FORMAT) { 448 uint32 length; 449 if (!input->ReadVarint32(&length)) return false; 450 io::CodedInputStream::Limit limit = input->PushLimit(length); 451 452 switch (field->type()) { 453 #define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD) \ 454 case FieldDescriptor::TYPE_##TYPE: { \ 455 while (input->BytesUntilLimit() > 0) { \ 456 CPPTYPE value; \ 457 if (!WireFormatLite::ReadPrimitive< \ 458 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value)) \ 459 return false; \ 460 message_reflection->Add##CPPTYPE_METHOD(message, field, value); \ 461 } \ 462 break; \ 463 } 464 465 HANDLE_PACKED_TYPE( INT32, int32, Int32) 466 HANDLE_PACKED_TYPE( INT64, int64, Int64) 467 HANDLE_PACKED_TYPE(SINT32, int32, Int32) 468 HANDLE_PACKED_TYPE(SINT64, int64, Int64) 469 HANDLE_PACKED_TYPE(UINT32, uint32, UInt32) 470 HANDLE_PACKED_TYPE(UINT64, uint64, UInt64) 471 472 HANDLE_PACKED_TYPE( FIXED32, uint32, UInt32) 473 HANDLE_PACKED_TYPE( FIXED64, uint64, UInt64) 474 HANDLE_PACKED_TYPE(SFIXED32, int32, Int32) 475 HANDLE_PACKED_TYPE(SFIXED64, int64, Int64) 476 477 HANDLE_PACKED_TYPE(FLOAT , float , Float ) 478 HANDLE_PACKED_TYPE(DOUBLE, double, Double) 479 480 HANDLE_PACKED_TYPE(BOOL, bool, Bool) 481 #undef HANDLE_PACKED_TYPE 482 483 case FieldDescriptor::TYPE_ENUM: { 484 while (input->BytesUntilLimit() > 0) { 485 int value; 486 if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>( 487 input, &value)) return false; 488 const EnumValueDescriptor* enum_value = 489 field->enum_type()->FindValueByNumber(value); 490 if (enum_value != NULL) { 491 message_reflection->AddEnum(message, field, enum_value); 492 } 493 } 494 495 break; 496 } 497 498 case FieldDescriptor::TYPE_STRING: 499 case FieldDescriptor::TYPE_GROUP: 500 case FieldDescriptor::TYPE_MESSAGE: 501 case FieldDescriptor::TYPE_BYTES: 502 // Can't have packed fields of these types: these should be caught by 503 // the protocol compiler. 504 return false; 505 break; 506 } 507 508 input->PopLimit(limit); 509 } else { 510 // Non-packed value (value_format == NORMAL_FORMAT) 511 switch (field->type()) { 512 #define HANDLE_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD) \ 513 case FieldDescriptor::TYPE_##TYPE: { \ 514 CPPTYPE value; \ 515 if (!WireFormatLite::ReadPrimitive< \ 516 CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value)) \ 517 return false; \ 518 if (field->is_repeated()) { \ 519 message_reflection->Add##CPPTYPE_METHOD(message, field, value); \ 520 } else { \ 521 message_reflection->Set##CPPTYPE_METHOD(message, field, value); \ 522 } \ 523 break; \ 524 } 525 526 HANDLE_TYPE( INT32, int32, Int32) 527 HANDLE_TYPE( INT64, int64, Int64) 528 HANDLE_TYPE(SINT32, int32, Int32) 529 HANDLE_TYPE(SINT64, int64, Int64) 530 HANDLE_TYPE(UINT32, uint32, UInt32) 531 HANDLE_TYPE(UINT64, uint64, UInt64) 532 533 HANDLE_TYPE( FIXED32, uint32, UInt32) 534 HANDLE_TYPE( FIXED64, uint64, UInt64) 535 HANDLE_TYPE(SFIXED32, int32, Int32) 536 HANDLE_TYPE(SFIXED64, int64, Int64) 537 538 HANDLE_TYPE(FLOAT , float , Float ) 539 HANDLE_TYPE(DOUBLE, double, Double) 540 541 HANDLE_TYPE(BOOL, bool, Bool) 542 #undef HANDLE_TYPE 543 544 case FieldDescriptor::TYPE_ENUM: { 545 int value; 546 if (!WireFormatLite::ReadPrimitive<int, WireFormatLite::TYPE_ENUM>( 547 input, &value)) return false; 548 const EnumValueDescriptor* enum_value = 549 field->enum_type()->FindValueByNumber(value); 550 if (enum_value != NULL) { 551 if (field->is_repeated()) { 552 message_reflection->AddEnum(message, field, enum_value); 553 } else { 554 message_reflection->SetEnum(message, field, enum_value); 555 } 556 } else { 557 // The enum value is not one of the known values. Add it to the 558 // UnknownFieldSet. 559 int64 sign_extended_value = static_cast<int64>(value); 560 message_reflection->MutableUnknownFields(message) 561 ->AddVarint(WireFormatLite::GetTagFieldNumber(tag), 562 sign_extended_value); 563 } 564 break; 565 } 566 567 // Handle strings separately so that we can optimize the ctype=CORD case. 568 case FieldDescriptor::TYPE_STRING: { 569 string value; 570 if (!WireFormatLite::ReadString(input, &value)) return false; 571 VerifyUTF8String(value.data(), value.length(), PARSE); 572 if (field->is_repeated()) { 573 message_reflection->AddString(message, field, value); 574 } else { 575 message_reflection->SetString(message, field, value); 576 } 577 break; 578 } 579 580 case FieldDescriptor::TYPE_BYTES: { 581 string value; 582 if (!WireFormatLite::ReadBytes(input, &value)) return false; 583 if (field->is_repeated()) { 584 message_reflection->AddString(message, field, value); 585 } else { 586 message_reflection->SetString(message, field, value); 587 } 588 break; 589 } 590 591 case FieldDescriptor::TYPE_GROUP: { 592 Message* sub_message; 593 if (field->is_repeated()) { 594 sub_message = message_reflection->AddMessage( 595 message, field, input->GetExtensionFactory()); 596 } else { 597 sub_message = message_reflection->MutableMessage( 598 message, field, input->GetExtensionFactory()); 599 } 600 601 if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag), 602 input, sub_message)) 603 return false; 604 break; 605 } 606 607 case FieldDescriptor::TYPE_MESSAGE: { 608 Message* sub_message; 609 if (field->is_repeated()) { 610 sub_message = message_reflection->AddMessage( 611 message, field, input->GetExtensionFactory()); 612 } else { 613 sub_message = message_reflection->MutableMessage( 614 message, field, input->GetExtensionFactory()); 615 } 616 617 if (!WireFormatLite::ReadMessage(input, sub_message)) return false; 618 break; 619 } 620 } 621 } 622 623 return true; 624 } 625 626 bool WireFormat::ParseAndMergeMessageSetItem( 627 io::CodedInputStream* input, 628 Message* message) { 629 const Reflection* message_reflection = message->GetReflection(); 630 631 // This method parses a group which should contain two fields: 632 // required int32 type_id = 2; 633 // required data message = 3; 634 635 // Once we see a type_id, we'll construct a fake tag for this extension 636 // which is the tag it would have had under the proto2 extensions wire 637 // format. 638 uint32 fake_tag = 0; 639 640 // Once we see a type_id, we'll look up the FieldDescriptor for the 641 // extension. 642 const FieldDescriptor* field = NULL; 643 644 // If we see message data before the type_id, we'll append it to this so 645 // we can parse it later. This will probably never happen in practice, 646 // as no MessageSet encoder I know of writes the message before the type ID. 647 // But, it's technically valid so we should allow it. 648 // TODO(kenton): Use a Cord instead? Do I care? 649 string message_data; 650 651 while (true) { 652 uint32 tag = input->ReadTag(); 653 if (tag == 0) return false; 654 655 switch (tag) { 656 case WireFormatLite::kMessageSetTypeIdTag: { 657 uint32 type_id; 658 if (!input->ReadVarint32(&type_id)) return false; 659 fake_tag = WireFormatLite::MakeTag( 660 type_id, WireFormatLite::WIRETYPE_LENGTH_DELIMITED); 661 field = message_reflection->FindKnownExtensionByNumber(type_id); 662 663 if (!message_data.empty()) { 664 // We saw some message data before the type_id. Have to parse it 665 // now. 666 io::ArrayInputStream raw_input(message_data.data(), 667 message_data.size()); 668 io::CodedInputStream sub_input(&raw_input); 669 if (!ParseAndMergeField(fake_tag, field, message, 670 &sub_input)) { 671 return false; 672 } 673 message_data.clear(); 674 } 675 676 break; 677 } 678 679 case WireFormatLite::kMessageSetMessageTag: { 680 if (fake_tag == 0) { 681 // We haven't seen a type_id yet. Append this data to message_data. 682 string temp; 683 uint32 length; 684 if (!input->ReadVarint32(&length)) return false; 685 if (!input->ReadString(&temp, length)) return false; 686 message_data.append(temp); 687 } else { 688 // Already saw type_id, so we can parse this directly. 689 if (!ParseAndMergeField(fake_tag, field, message, input)) { 690 return false; 691 } 692 } 693 694 break; 695 } 696 697 case WireFormatLite::kMessageSetItemEndTag: { 698 return true; 699 } 700 701 default: { 702 if (!SkipField(input, tag, NULL)) return false; 703 } 704 } 705 } 706 } 707 708 // =================================================================== 709 710 void WireFormat::SerializeWithCachedSizes( 711 const Message& message, 712 int size, io::CodedOutputStream* output) { 713 const Descriptor* descriptor = message.GetDescriptor(); 714 const Reflection* message_reflection = message.GetReflection(); 715 int expected_endpoint = output->ByteCount() + size; 716 717 vector<const FieldDescriptor*> fields; 718 message_reflection->ListFields(message, &fields); 719 for (int i = 0; i < fields.size(); i++) { 720 SerializeFieldWithCachedSizes(fields[i], message, output); 721 } 722 723 if (descriptor->options().message_set_wire_format()) { 724 SerializeUnknownMessageSetItems( 725 message_reflection->GetUnknownFields(message), output); 726 } else { 727 SerializeUnknownFields( 728 message_reflection->GetUnknownFields(message), output); 729 } 730 731 GOOGLE_CHECK_EQ(output->ByteCount(), expected_endpoint) 732 << ": Protocol message serialized to a size different from what was " 733 "originally expected. Perhaps it was modified by another thread " 734 "during serialization?"; 735 } 736 737 void WireFormat::SerializeFieldWithCachedSizes( 738 const FieldDescriptor* field, 739 const Message& message, 740 io::CodedOutputStream* output) { 741 const Reflection* message_reflection = message.GetReflection(); 742 743 if (field->is_extension() && 744 field->containing_type()->options().message_set_wire_format() && 745 field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 746 !field->is_repeated()) { 747 SerializeMessageSetItemWithCachedSizes(field, message, output); 748 return; 749 } 750 751 int count = 0; 752 753 if (field->is_repeated()) { 754 count = message_reflection->FieldSize(message, field); 755 } else if (message_reflection->HasField(message, field)) { 756 count = 1; 757 } 758 759 const bool is_packed = field->options().packed(); 760 if (is_packed && count > 0) { 761 WireFormatLite::WriteTag(field->number(), 762 WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output); 763 const int data_size = FieldDataOnlyByteSize(field, message); 764 output->WriteVarint32(data_size); 765 } 766 767 for (int j = 0; j < count; j++) { 768 switch (field->type()) { 769 #define HANDLE_PRIMITIVE_TYPE(TYPE, CPPTYPE, TYPE_METHOD, CPPTYPE_METHOD) \ 770 case FieldDescriptor::TYPE_##TYPE: { \ 771 const CPPTYPE value = field->is_repeated() ? \ 772 message_reflection->GetRepeated##CPPTYPE_METHOD( \ 773 message, field, j) : \ 774 message_reflection->Get##CPPTYPE_METHOD( \ 775 message, field); \ 776 if (is_packed) { \ 777 WireFormatLite::Write##TYPE_METHOD##NoTag(value, output); \ 778 } else { \ 779 WireFormatLite::Write##TYPE_METHOD(field->number(), value, output); \ 780 } \ 781 break; \ 782 } 783 784 HANDLE_PRIMITIVE_TYPE( INT32, int32, Int32, Int32) 785 HANDLE_PRIMITIVE_TYPE( INT64, int64, Int64, Int64) 786 HANDLE_PRIMITIVE_TYPE(SINT32, int32, SInt32, Int32) 787 HANDLE_PRIMITIVE_TYPE(SINT64, int64, SInt64, Int64) 788 HANDLE_PRIMITIVE_TYPE(UINT32, uint32, UInt32, UInt32) 789 HANDLE_PRIMITIVE_TYPE(UINT64, uint64, UInt64, UInt64) 790 791 HANDLE_PRIMITIVE_TYPE( FIXED32, uint32, Fixed32, UInt32) 792 HANDLE_PRIMITIVE_TYPE( FIXED64, uint64, Fixed64, UInt64) 793 HANDLE_PRIMITIVE_TYPE(SFIXED32, int32, SFixed32, Int32) 794 HANDLE_PRIMITIVE_TYPE(SFIXED64, int64, SFixed64, Int64) 795 796 HANDLE_PRIMITIVE_TYPE(FLOAT , float , Float , Float ) 797 HANDLE_PRIMITIVE_TYPE(DOUBLE, double, Double, Double) 798 799 HANDLE_PRIMITIVE_TYPE(BOOL, bool, Bool, Bool) 800 #undef HANDLE_PRIMITIVE_TYPE 801 802 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD) \ 803 case FieldDescriptor::TYPE_##TYPE: \ 804 WireFormatLite::Write##TYPE_METHOD( \ 805 field->number(), \ 806 field->is_repeated() ? \ 807 message_reflection->GetRepeated##CPPTYPE_METHOD( \ 808 message, field, j) : \ 809 message_reflection->Get##CPPTYPE_METHOD(message, field), \ 810 output); \ 811 break; 812 813 HANDLE_TYPE(GROUP , Group , Message) 814 HANDLE_TYPE(MESSAGE, Message, Message) 815 #undef HANDLE_TYPE 816 817 case FieldDescriptor::TYPE_ENUM: { 818 const EnumValueDescriptor* value = field->is_repeated() ? 819 message_reflection->GetRepeatedEnum(message, field, j) : 820 message_reflection->GetEnum(message, field); 821 if (is_packed) { 822 WireFormatLite::WriteEnumNoTag(value->number(), output); 823 } else { 824 WireFormatLite::WriteEnum(field->number(), value->number(), output); 825 } 826 break; 827 } 828 829 // Handle strings separately so that we can get string references 830 // instead of copying. 831 case FieldDescriptor::TYPE_STRING: { 832 string scratch; 833 const string& value = field->is_repeated() ? 834 message_reflection->GetRepeatedStringReference( 835 message, field, j, &scratch) : 836 message_reflection->GetStringReference(message, field, &scratch); 837 VerifyUTF8String(value.data(), value.length(), SERIALIZE); 838 WireFormatLite::WriteString(field->number(), value, output); 839 break; 840 } 841 842 case FieldDescriptor::TYPE_BYTES: { 843 string scratch; 844 const string& value = field->is_repeated() ? 845 message_reflection->GetRepeatedStringReference( 846 message, field, j, &scratch) : 847 message_reflection->GetStringReference(message, field, &scratch); 848 WireFormatLite::WriteBytes(field->number(), value, output); 849 break; 850 } 851 } 852 } 853 } 854 855 void WireFormat::SerializeMessageSetItemWithCachedSizes( 856 const FieldDescriptor* field, 857 const Message& message, 858 io::CodedOutputStream* output) { 859 const Reflection* message_reflection = message.GetReflection(); 860 861 // Start group. 862 output->WriteVarint32(WireFormatLite::kMessageSetItemStartTag); 863 864 // Write type ID. 865 output->WriteVarint32(WireFormatLite::kMessageSetTypeIdTag); 866 output->WriteVarint32(field->number()); 867 868 // Write message. 869 output->WriteVarint32(WireFormatLite::kMessageSetMessageTag); 870 871 const Message& sub_message = message_reflection->GetMessage(message, field); 872 output->WriteVarint32(sub_message.GetCachedSize()); 873 sub_message.SerializeWithCachedSizes(output); 874 875 // End group. 876 output->WriteVarint32(WireFormatLite::kMessageSetItemEndTag); 877 } 878 879 // =================================================================== 880 881 int WireFormat::ByteSize(const Message& message) { 882 const Descriptor* descriptor = message.GetDescriptor(); 883 const Reflection* message_reflection = message.GetReflection(); 884 885 int our_size = 0; 886 887 vector<const FieldDescriptor*> fields; 888 message_reflection->ListFields(message, &fields); 889 for (int i = 0; i < fields.size(); i++) { 890 our_size += FieldByteSize(fields[i], message); 891 } 892 893 if (descriptor->options().message_set_wire_format()) { 894 our_size += ComputeUnknownMessageSetItemsSize( 895 message_reflection->GetUnknownFields(message)); 896 } else { 897 our_size += ComputeUnknownFieldsSize( 898 message_reflection->GetUnknownFields(message)); 899 } 900 901 return our_size; 902 } 903 904 int WireFormat::FieldByteSize( 905 const FieldDescriptor* field, 906 const Message& message) { 907 const Reflection* message_reflection = message.GetReflection(); 908 909 if (field->is_extension() && 910 field->containing_type()->options().message_set_wire_format() && 911 field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 912 !field->is_repeated()) { 913 return MessageSetItemByteSize(field, message); 914 } 915 916 int count = 0; 917 if (field->is_repeated()) { 918 count = message_reflection->FieldSize(message, field); 919 } else if (message_reflection->HasField(message, field)) { 920 count = 1; 921 } 922 923 const int data_size = FieldDataOnlyByteSize(field, message); 924 int our_size = data_size; 925 if (field->options().packed()) { 926 if (data_size > 0) { 927 // Packed fields get serialized like a string, not their native type. 928 // Technically this doesn't really matter; the size only changes if it's 929 // a GROUP 930 our_size += TagSize(field->number(), FieldDescriptor::TYPE_STRING); 931 our_size += io::CodedOutputStream::VarintSize32(data_size); 932 } 933 } else { 934 our_size += count * TagSize(field->number(), field->type()); 935 } 936 return our_size; 937 } 938 939 int WireFormat::FieldDataOnlyByteSize( 940 const FieldDescriptor* field, 941 const Message& message) { 942 const Reflection* message_reflection = message.GetReflection(); 943 944 int count = 0; 945 if (field->is_repeated()) { 946 count = message_reflection->FieldSize(message, field); 947 } else if (message_reflection->HasField(message, field)) { 948 count = 1; 949 } 950 951 int data_size = 0; 952 switch (field->type()) { 953 #define HANDLE_TYPE(TYPE, TYPE_METHOD, CPPTYPE_METHOD) \ 954 case FieldDescriptor::TYPE_##TYPE: \ 955 if (field->is_repeated()) { \ 956 for (int j = 0; j < count; j++) { \ 957 data_size += WireFormatLite::TYPE_METHOD##Size( \ 958 message_reflection->GetRepeated##CPPTYPE_METHOD( \ 959 message, field, j)); \ 960 } \ 961 } else { \ 962 data_size += WireFormatLite::TYPE_METHOD##Size( \ 963 message_reflection->Get##CPPTYPE_METHOD(message, field)); \ 964 } \ 965 break; 966 967 #define HANDLE_FIXED_TYPE(TYPE, TYPE_METHOD) \ 968 case FieldDescriptor::TYPE_##TYPE: \ 969 data_size += count * WireFormatLite::k##TYPE_METHOD##Size; \ 970 break; 971 972 HANDLE_TYPE( INT32, Int32, Int32) 973 HANDLE_TYPE( INT64, Int64, Int64) 974 HANDLE_TYPE(SINT32, SInt32, Int32) 975 HANDLE_TYPE(SINT64, SInt64, Int64) 976 HANDLE_TYPE(UINT32, UInt32, UInt32) 977 HANDLE_TYPE(UINT64, UInt64, UInt64) 978 979 HANDLE_FIXED_TYPE( FIXED32, Fixed32) 980 HANDLE_FIXED_TYPE( FIXED64, Fixed64) 981 HANDLE_FIXED_TYPE(SFIXED32, SFixed32) 982 HANDLE_FIXED_TYPE(SFIXED64, SFixed64) 983 984 HANDLE_FIXED_TYPE(FLOAT , Float ) 985 HANDLE_FIXED_TYPE(DOUBLE, Double) 986 987 HANDLE_FIXED_TYPE(BOOL, Bool) 988 989 HANDLE_TYPE(GROUP , Group , Message) 990 HANDLE_TYPE(MESSAGE, Message, Message) 991 #undef HANDLE_TYPE 992 #undef HANDLE_FIXED_TYPE 993 994 case FieldDescriptor::TYPE_ENUM: { 995 if (field->is_repeated()) { 996 for (int j = 0; j < count; j++) { 997 data_size += WireFormatLite::EnumSize( 998 message_reflection->GetRepeatedEnum(message, field, j)->number()); 999 } 1000 } else { 1001 data_size += WireFormatLite::EnumSize( 1002 message_reflection->GetEnum(message, field)->number()); 1003 } 1004 break; 1005 } 1006 1007 // Handle strings separately so that we can get string references 1008 // instead of copying. 1009 case FieldDescriptor::TYPE_STRING: 1010 case FieldDescriptor::TYPE_BYTES: { 1011 for (int j = 0; j < count; j++) { 1012 string scratch; 1013 const string& value = field->is_repeated() ? 1014 message_reflection->GetRepeatedStringReference( 1015 message, field, j, &scratch) : 1016 message_reflection->GetStringReference(message, field, &scratch); 1017 data_size += WireFormatLite::StringSize(value); 1018 } 1019 break; 1020 } 1021 } 1022 return data_size; 1023 } 1024 1025 int WireFormat::MessageSetItemByteSize( 1026 const FieldDescriptor* field, 1027 const Message& message) { 1028 const Reflection* message_reflection = message.GetReflection(); 1029 1030 int our_size = WireFormatLite::kMessageSetItemTagsSize; 1031 1032 // type_id 1033 our_size += io::CodedOutputStream::VarintSize32(field->number()); 1034 1035 // message 1036 const Message& sub_message = message_reflection->GetMessage(message, field); 1037 int message_size = sub_message.ByteSize(); 1038 1039 our_size += io::CodedOutputStream::VarintSize32(message_size); 1040 our_size += message_size; 1041 1042 return our_size; 1043 } 1044 1045 void WireFormat::VerifyUTF8StringFallback(const char* data, 1046 int size, 1047 Operation op) { 1048 if (!IsStructurallyValidUTF8(data, size)) { 1049 const char* operation_str = NULL; 1050 switch (op) { 1051 case PARSE: 1052 operation_str = "parsing"; 1053 break; 1054 case SERIALIZE: 1055 operation_str = "serializing"; 1056 break; 1057 // no default case: have the compiler warn if a case is not covered. 1058 } 1059 GOOGLE_LOG(ERROR) << "Encountered string containing invalid UTF-8 data while " 1060 << operation_str 1061 << " protocol buffer. Strings must contain only UTF-8; " 1062 "use the 'bytes' type for raw bytes."; 1063 } 1064 } 1065 1066 1067 } // namespace internal 1068 } // namespace protobuf 1069 } // namespace google 1070