1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // DynamicMessage is implemented by constructing a data structure which 36 // has roughly the same memory layout as a generated message would have. 37 // Then, we use GeneratedMessageReflection to implement our reflection 38 // interface. All the other operations we need to implement (e.g. 39 // parsing, copying, etc.) are already implemented in terms of 40 // Reflection, so the rest is easy. 41 // 42 // The up side of this strategy is that it's very efficient. We don't 43 // need to use hash_maps or generic representations of fields. The 44 // down side is that this is a low-level memory management hack which 45 // can be tricky to get right. 46 // 47 // As mentioned in the header, we only expose a DynamicMessageFactory 48 // publicly, not the DynamicMessage class itself. This is because 49 // GenericMessageReflection wants to have a pointer to a "default" 50 // copy of the class, with all fields initialized to their default 51 // values. We only want to construct one of these per message type, 52 // so DynamicMessageFactory stores a cache of default messages for 53 // each type it sees (each unique Descriptor pointer). The code 54 // refers to the "default" copy of the class as the "prototype". 55 // 56 // Note on memory allocation: This module often calls "operator new()" 57 // to allocate untyped memory, rather than calling something like 58 // "new uint8[]". This is because "operator new()" means "Give me some 59 // space which I can use as I please." while "new uint8[]" means "Give 60 // me an array of 8-bit integers.". In practice, the later may return 61 // a pointer that is not aligned correctly for general use. I believe 62 // Item 8 of "More Effective C++" discusses this in more detail, though 63 // I don't have the book on me right now so I'm not sure. 64 65 #include <algorithm> 66 #include <google/protobuf/stubs/hash.h> 67 68 #include <google/protobuf/stubs/common.h> 69 70 #include <google/protobuf/dynamic_message.h> 71 #include <google/protobuf/descriptor.h> 72 #include <google/protobuf/descriptor.pb.h> 73 #include <google/protobuf/generated_message_util.h> 74 #include <google/protobuf/generated_message_reflection.h> 75 #include <google/protobuf/reflection_ops.h> 76 #include <google/protobuf/repeated_field.h> 77 #include <google/protobuf/extension_set.h> 78 #include <google/protobuf/wire_format.h> 79 80 namespace google { 81 namespace protobuf { 82 83 using internal::WireFormat; 84 using internal::ExtensionSet; 85 using internal::GeneratedMessageReflection; 86 87 88 // =================================================================== 89 // Some helper tables and functions... 90 91 namespace { 92 93 // Compute the byte size of the in-memory representation of the field. 94 int FieldSpaceUsed(const FieldDescriptor* field) { 95 typedef FieldDescriptor FD; // avoid line wrapping 96 if (field->label() == FD::LABEL_REPEATED) { 97 switch (field->cpp_type()) { 98 case FD::CPPTYPE_INT32 : return sizeof(RepeatedField<int32 >); 99 case FD::CPPTYPE_INT64 : return sizeof(RepeatedField<int64 >); 100 case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32 >); 101 case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64 >); 102 case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double >); 103 case FD::CPPTYPE_FLOAT : return sizeof(RepeatedField<float >); 104 case FD::CPPTYPE_BOOL : return sizeof(RepeatedField<bool >); 105 case FD::CPPTYPE_ENUM : return sizeof(RepeatedField<int >); 106 case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>); 107 108 case FD::CPPTYPE_STRING: 109 switch (field->options().ctype()) { 110 default: // TODO(kenton): Support other string reps. 111 case FieldOptions::STRING: 112 return sizeof(RepeatedPtrField<string>); 113 } 114 break; 115 } 116 } else { 117 switch (field->cpp_type()) { 118 case FD::CPPTYPE_INT32 : return sizeof(int32 ); 119 case FD::CPPTYPE_INT64 : return sizeof(int64 ); 120 case FD::CPPTYPE_UINT32 : return sizeof(uint32 ); 121 case FD::CPPTYPE_UINT64 : return sizeof(uint64 ); 122 case FD::CPPTYPE_DOUBLE : return sizeof(double ); 123 case FD::CPPTYPE_FLOAT : return sizeof(float ); 124 case FD::CPPTYPE_BOOL : return sizeof(bool ); 125 case FD::CPPTYPE_ENUM : return sizeof(int ); 126 case FD::CPPTYPE_MESSAGE: return sizeof(Message*); 127 128 case FD::CPPTYPE_STRING: 129 switch (field->options().ctype()) { 130 default: // TODO(kenton): Support other string reps. 131 case FieldOptions::STRING: 132 return sizeof(string*); 133 } 134 break; 135 } 136 } 137 138 GOOGLE_LOG(DFATAL) << "Can't get here."; 139 return 0; 140 } 141 142 inline int DivideRoundingUp(int i, int j) { 143 return (i + (j - 1)) / j; 144 } 145 146 static const int kSafeAlignment = sizeof(uint64); 147 148 inline int AlignTo(int offset, int alignment) { 149 return DivideRoundingUp(offset, alignment) * alignment; 150 } 151 152 // Rounds the given byte offset up to the next offset aligned such that any 153 // type may be stored at it. 154 inline int AlignOffset(int offset) { 155 return AlignTo(offset, kSafeAlignment); 156 } 157 158 #define bitsizeof(T) (sizeof(T) * 8) 159 160 } // namespace 161 162 // =================================================================== 163 164 class DynamicMessage : public Message { 165 public: 166 struct TypeInfo { 167 int size; 168 int has_bits_offset; 169 int unknown_fields_offset; 170 int extensions_offset; 171 172 // Not owned by the TypeInfo. 173 DynamicMessageFactory* factory; // The factory that created this object. 174 const DescriptorPool* pool; // The factory's DescriptorPool. 175 const Descriptor* type; // Type of this DynamicMessage. 176 177 // Warning: The order in which the following pointers are defined is 178 // important (the prototype must be deleted *before* the offsets). 179 scoped_array<int> offsets; 180 scoped_ptr<const GeneratedMessageReflection> reflection; 181 scoped_ptr<const DynamicMessage> prototype; 182 }; 183 184 DynamicMessage(const TypeInfo* type_info); 185 ~DynamicMessage(); 186 187 // Called on the prototype after construction to initialize message fields. 188 void CrossLinkPrototypes(); 189 190 // implements Message ---------------------------------------------- 191 192 Message* New() const; 193 194 int GetCachedSize() const; 195 void SetCachedSize(int size) const; 196 197 Metadata GetMetadata() const; 198 199 private: 200 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage); 201 202 inline bool is_prototype() const { 203 return type_info_->prototype == this || 204 // If type_info_->prototype is NULL, then we must be constructing 205 // the prototype now, which means we must be the prototype. 206 type_info_->prototype == NULL; 207 } 208 209 inline void* OffsetToPointer(int offset) { 210 return reinterpret_cast<uint8*>(this) + offset; 211 } 212 inline const void* OffsetToPointer(int offset) const { 213 return reinterpret_cast<const uint8*>(this) + offset; 214 } 215 216 const TypeInfo* type_info_; 217 218 // TODO(kenton): Make this an atomic<int> when C++ supports it. 219 mutable int cached_byte_size_; 220 }; 221 222 DynamicMessage::DynamicMessage(const TypeInfo* type_info) 223 : type_info_(type_info), 224 cached_byte_size_(0) { 225 // We need to call constructors for various fields manually and set 226 // default values where appropriate. We use placement new to call 227 // constructors. If you haven't heard of placement new, I suggest Googling 228 // it now. We use placement new even for primitive types that don't have 229 // constructors for consistency. (In theory, placement new should be used 230 // any time you are trying to convert untyped memory to typed memory, though 231 // in practice that's not strictly necessary for types that don't have a 232 // constructor.) 233 234 const Descriptor* descriptor = type_info_->type; 235 236 new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet; 237 238 if (type_info_->extensions_offset != -1) { 239 new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet; 240 } 241 242 for (int i = 0; i < descriptor->field_count(); i++) { 243 const FieldDescriptor* field = descriptor->field(i); 244 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 245 switch (field->cpp_type()) { 246 #define HANDLE_TYPE(CPPTYPE, TYPE) \ 247 case FieldDescriptor::CPPTYPE_##CPPTYPE: \ 248 if (!field->is_repeated()) { \ 249 new(field_ptr) TYPE(field->default_value_##TYPE()); \ 250 } else { \ 251 new(field_ptr) RepeatedField<TYPE>(); \ 252 } \ 253 break; 254 255 HANDLE_TYPE(INT32 , int32 ); 256 HANDLE_TYPE(INT64 , int64 ); 257 HANDLE_TYPE(UINT32, uint32); 258 HANDLE_TYPE(UINT64, uint64); 259 HANDLE_TYPE(DOUBLE, double); 260 HANDLE_TYPE(FLOAT , float ); 261 HANDLE_TYPE(BOOL , bool ); 262 #undef HANDLE_TYPE 263 264 case FieldDescriptor::CPPTYPE_ENUM: 265 if (!field->is_repeated()) { 266 new(field_ptr) int(field->default_value_enum()->number()); 267 } else { 268 new(field_ptr) RepeatedField<int>(); 269 } 270 break; 271 272 case FieldDescriptor::CPPTYPE_STRING: 273 switch (field->options().ctype()) { 274 default: // TODO(kenton): Support other string reps. 275 case FieldOptions::STRING: 276 if (!field->is_repeated()) { 277 if (is_prototype()) { 278 new(field_ptr) const string*(&field->default_value_string()); 279 } else { 280 string* default_value = 281 *reinterpret_cast<string* const*>( 282 type_info_->prototype->OffsetToPointer( 283 type_info_->offsets[i])); 284 new(field_ptr) string*(default_value); 285 } 286 } else { 287 new(field_ptr) RepeatedPtrField<string>(); 288 } 289 break; 290 } 291 break; 292 293 case FieldDescriptor::CPPTYPE_MESSAGE: { 294 if (!field->is_repeated()) { 295 new(field_ptr) Message*(NULL); 296 } else { 297 new(field_ptr) RepeatedPtrField<Message>(); 298 } 299 break; 300 } 301 } 302 } 303 } 304 305 DynamicMessage::~DynamicMessage() { 306 const Descriptor* descriptor = type_info_->type; 307 308 reinterpret_cast<UnknownFieldSet*>( 309 OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet(); 310 311 if (type_info_->extensions_offset != -1) { 312 reinterpret_cast<ExtensionSet*>( 313 OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet(); 314 } 315 316 // We need to manually run the destructors for repeated fields and strings, 317 // just as we ran their constructors in the the DynamicMessage constructor. 318 // Additionally, if any singular embedded messages have been allocated, we 319 // need to delete them, UNLESS we are the prototype message of this type, 320 // in which case any embedded messages are other prototypes and shouldn't 321 // be touched. 322 for (int i = 0; i < descriptor->field_count(); i++) { 323 const FieldDescriptor* field = descriptor->field(i); 324 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 325 326 if (field->is_repeated()) { 327 switch (field->cpp_type()) { 328 #define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ 329 case FieldDescriptor::CPPTYPE_##UPPERCASE : \ 330 reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr) \ 331 ->~RepeatedField<LOWERCASE>(); \ 332 break 333 334 HANDLE_TYPE( INT32, int32); 335 HANDLE_TYPE( INT64, int64); 336 HANDLE_TYPE(UINT32, uint32); 337 HANDLE_TYPE(UINT64, uint64); 338 HANDLE_TYPE(DOUBLE, double); 339 HANDLE_TYPE( FLOAT, float); 340 HANDLE_TYPE( BOOL, bool); 341 HANDLE_TYPE( ENUM, int); 342 #undef HANDLE_TYPE 343 344 case FieldDescriptor::CPPTYPE_STRING: 345 switch (field->options().ctype()) { 346 default: // TODO(kenton): Support other string reps. 347 case FieldOptions::STRING: 348 reinterpret_cast<RepeatedPtrField<string>*>(field_ptr) 349 ->~RepeatedPtrField<string>(); 350 break; 351 } 352 break; 353 354 case FieldDescriptor::CPPTYPE_MESSAGE: 355 reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr) 356 ->~RepeatedPtrField<Message>(); 357 break; 358 } 359 360 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) { 361 switch (field->options().ctype()) { 362 default: // TODO(kenton): Support other string reps. 363 case FieldOptions::STRING: { 364 string* ptr = *reinterpret_cast<string**>(field_ptr); 365 if (ptr != &field->default_value_string()) { 366 delete ptr; 367 } 368 break; 369 } 370 } 371 } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) && 372 !is_prototype()) { 373 Message* message = *reinterpret_cast<Message**>(field_ptr); 374 if (message != NULL) { 375 delete message; 376 } 377 } 378 } 379 } 380 381 void DynamicMessage::CrossLinkPrototypes() { 382 // This should only be called on the prototype message. 383 GOOGLE_CHECK(is_prototype()); 384 385 DynamicMessageFactory* factory = type_info_->factory; 386 const Descriptor* descriptor = type_info_->type; 387 388 // Cross-link default messages. 389 for (int i = 0; i < descriptor->field_count(); i++) { 390 const FieldDescriptor* field = descriptor->field(i); 391 void* field_ptr = OffsetToPointer(type_info_->offsets[i]); 392 393 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && 394 !field->is_repeated()) { 395 // For fields with message types, we need to cross-link with the 396 // prototype for the field's type. 397 // For singular fields, the field is just a pointer which should 398 // point to the prototype. 399 *reinterpret_cast<const Message**>(field_ptr) = 400 factory->GetPrototypeNoLock(field->message_type()); 401 } 402 } 403 } 404 405 Message* DynamicMessage::New() const { 406 void* new_base = reinterpret_cast<uint8*>(operator new(type_info_->size)); 407 memset(new_base, 0, type_info_->size); 408 return new(new_base) DynamicMessage(type_info_); 409 } 410 411 int DynamicMessage::GetCachedSize() const { 412 return cached_byte_size_; 413 } 414 415 void DynamicMessage::SetCachedSize(int size) const { 416 // This is theoretically not thread-compatible, but in practice it works 417 // because if multiple threads write this simultaneously, they will be 418 // writing the exact same value. 419 cached_byte_size_ = size; 420 } 421 422 Metadata DynamicMessage::GetMetadata() const { 423 Metadata metadata; 424 metadata.descriptor = type_info_->type; 425 metadata.reflection = type_info_->reflection.get(); 426 return metadata; 427 } 428 429 // =================================================================== 430 431 struct DynamicMessageFactory::PrototypeMap { 432 typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map; 433 Map map_; 434 }; 435 436 DynamicMessageFactory::DynamicMessageFactory() 437 : pool_(NULL), delegate_to_generated_factory_(false), 438 prototypes_(new PrototypeMap) { 439 } 440 441 DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool) 442 : pool_(pool), delegate_to_generated_factory_(false), 443 prototypes_(new PrototypeMap) { 444 } 445 446 DynamicMessageFactory::~DynamicMessageFactory() { 447 for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin(); 448 iter != prototypes_->map_.end(); ++iter) { 449 delete iter->second; 450 } 451 } 452 453 const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) { 454 MutexLock lock(&prototypes_mutex_); 455 return GetPrototypeNoLock(type); 456 } 457 458 const Message* DynamicMessageFactory::GetPrototypeNoLock( 459 const Descriptor* type) { 460 if (delegate_to_generated_factory_ && 461 type->file()->pool() == DescriptorPool::generated_pool()) { 462 return MessageFactory::generated_factory()->GetPrototype(type); 463 } 464 465 const DynamicMessage::TypeInfo** target = &prototypes_->map_[type]; 466 if (*target != NULL) { 467 // Already exists. 468 return (*target)->prototype.get(); 469 } 470 471 DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo; 472 *target = type_info; 473 474 type_info->type = type; 475 type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_; 476 type_info->factory = this; 477 478 // We need to construct all the structures passed to 479 // GeneratedMessageReflection's constructor. This includes: 480 // - A block of memory that contains space for all the message's fields. 481 // - An array of integers indicating the byte offset of each field within 482 // this block. 483 // - A big bitfield containing a bit for each field indicating whether 484 // or not that field is set. 485 486 // Compute size and offsets. 487 int* offsets = new int[type->field_count()]; 488 type_info->offsets.reset(offsets); 489 490 // Decide all field offsets by packing in order. 491 // We place the DynamicMessage object itself at the beginning of the allocated 492 // space. 493 int size = sizeof(DynamicMessage); 494 size = AlignOffset(size); 495 496 // Next the has_bits, which is an array of uint32s. 497 type_info->has_bits_offset = size; 498 int has_bits_array_size = 499 DivideRoundingUp(type->field_count(), bitsizeof(uint32)); 500 size += has_bits_array_size * sizeof(uint32); 501 size = AlignOffset(size); 502 503 // The ExtensionSet, if any. 504 if (type->extension_range_count() > 0) { 505 type_info->extensions_offset = size; 506 size += sizeof(ExtensionSet); 507 size = AlignOffset(size); 508 } else { 509 // No extensions. 510 type_info->extensions_offset = -1; 511 } 512 513 // All the fields. 514 for (int i = 0; i < type->field_count(); i++) { 515 // Make sure field is aligned to avoid bus errors. 516 int field_size = FieldSpaceUsed(type->field(i)); 517 size = AlignTo(size, min(kSafeAlignment, field_size)); 518 offsets[i] = size; 519 size += field_size; 520 } 521 522 // Add the UnknownFieldSet to the end. 523 size = AlignOffset(size); 524 type_info->unknown_fields_offset = size; 525 size += sizeof(UnknownFieldSet); 526 527 // Align the final size to make sure no clever allocators think that 528 // alignment is not necessary. 529 size = AlignOffset(size); 530 type_info->size = size; 531 532 // Allocate the prototype. 533 void* base = operator new(size); 534 memset(base, 0, size); 535 DynamicMessage* prototype = new(base) DynamicMessage(type_info); 536 type_info->prototype.reset(prototype); 537 538 // Construct the reflection object. 539 type_info->reflection.reset( 540 new GeneratedMessageReflection( 541 type_info->type, 542 type_info->prototype.get(), 543 type_info->offsets.get(), 544 type_info->has_bits_offset, 545 type_info->unknown_fields_offset, 546 type_info->extensions_offset, 547 type_info->pool, 548 this, 549 type_info->size)); 550 551 // Cross link prototypes. 552 prototype->CrossLinkPrototypes(); 553 554 return prototype; 555 } 556 557 } // namespace protobuf 558 } // namespace google 559