Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 //
     35 // DynamicMessage is implemented by constructing a data structure which
     36 // has roughly the same memory layout as a generated message would have.
     37 // Then, we use GeneratedMessageReflection to implement our reflection
     38 // interface.  All the other operations we need to implement (e.g.
     39 // parsing, copying, etc.) are already implemented in terms of
     40 // Reflection, so the rest is easy.
     41 //
     42 // The up side of this strategy is that it's very efficient.  We don't
     43 // need to use hash_maps or generic representations of fields.  The
     44 // down side is that this is a low-level memory management hack which
     45 // can be tricky to get right.
     46 //
     47 // As mentioned in the header, we only expose a DynamicMessageFactory
     48 // publicly, not the DynamicMessage class itself.  This is because
     49 // GenericMessageReflection wants to have a pointer to a "default"
     50 // copy of the class, with all fields initialized to their default
     51 // values.  We only want to construct one of these per message type,
     52 // so DynamicMessageFactory stores a cache of default messages for
     53 // each type it sees (each unique Descriptor pointer).  The code
     54 // refers to the "default" copy of the class as the "prototype".
     55 //
     56 // Note on memory allocation:  This module often calls "operator new()"
     57 // to allocate untyped memory, rather than calling something like
     58 // "new uint8[]".  This is because "operator new()" means "Give me some
     59 // space which I can use as I please." while "new uint8[]" means "Give
     60 // me an array of 8-bit integers.".  In practice, the later may return
     61 // a pointer that is not aligned correctly for general use.  I believe
     62 // Item 8 of "More Effective C++" discusses this in more detail, though
     63 // I don't have the book on me right now so I'm not sure.
     64 
     65 #include <algorithm>
     66 #include <google/protobuf/stubs/hash.h>
     67 
     68 #include <google/protobuf/stubs/common.h>
     69 
     70 #include <google/protobuf/dynamic_message.h>
     71 #include <google/protobuf/descriptor.h>
     72 #include <google/protobuf/descriptor.pb.h>
     73 #include <google/protobuf/generated_message_util.h>
     74 #include <google/protobuf/generated_message_reflection.h>
     75 #include <google/protobuf/reflection_ops.h>
     76 #include <google/protobuf/repeated_field.h>
     77 #include <google/protobuf/extension_set.h>
     78 #include <google/protobuf/wire_format.h>
     79 
     80 namespace google {
     81 namespace protobuf {
     82 
     83 using internal::WireFormat;
     84 using internal::ExtensionSet;
     85 using internal::GeneratedMessageReflection;
     86 
     87 
     88 // ===================================================================
     89 // Some helper tables and functions...
     90 
     91 namespace {
     92 
     93 // Compute the byte size of the in-memory representation of the field.
     94 int FieldSpaceUsed(const FieldDescriptor* field) {
     95   typedef FieldDescriptor FD;  // avoid line wrapping
     96   if (field->label() == FD::LABEL_REPEATED) {
     97     switch (field->cpp_type()) {
     98       case FD::CPPTYPE_INT32  : return sizeof(RepeatedField<int32   >);
     99       case FD::CPPTYPE_INT64  : return sizeof(RepeatedField<int64   >);
    100       case FD::CPPTYPE_UINT32 : return sizeof(RepeatedField<uint32  >);
    101       case FD::CPPTYPE_UINT64 : return sizeof(RepeatedField<uint64  >);
    102       case FD::CPPTYPE_DOUBLE : return sizeof(RepeatedField<double  >);
    103       case FD::CPPTYPE_FLOAT  : return sizeof(RepeatedField<float   >);
    104       case FD::CPPTYPE_BOOL   : return sizeof(RepeatedField<bool    >);
    105       case FD::CPPTYPE_ENUM   : return sizeof(RepeatedField<int     >);
    106       case FD::CPPTYPE_MESSAGE: return sizeof(RepeatedPtrField<Message>);
    107 
    108       case FD::CPPTYPE_STRING:
    109         switch (field->options().ctype()) {
    110           default:  // TODO(kenton):  Support other string reps.
    111           case FieldOptions::STRING:
    112             return sizeof(RepeatedPtrField<string>);
    113         }
    114         break;
    115     }
    116   } else {
    117     switch (field->cpp_type()) {
    118       case FD::CPPTYPE_INT32  : return sizeof(int32   );
    119       case FD::CPPTYPE_INT64  : return sizeof(int64   );
    120       case FD::CPPTYPE_UINT32 : return sizeof(uint32  );
    121       case FD::CPPTYPE_UINT64 : return sizeof(uint64  );
    122       case FD::CPPTYPE_DOUBLE : return sizeof(double  );
    123       case FD::CPPTYPE_FLOAT  : return sizeof(float   );
    124       case FD::CPPTYPE_BOOL   : return sizeof(bool    );
    125       case FD::CPPTYPE_ENUM   : return sizeof(int     );
    126       case FD::CPPTYPE_MESSAGE: return sizeof(Message*);
    127 
    128       case FD::CPPTYPE_STRING:
    129         switch (field->options().ctype()) {
    130           default:  // TODO(kenton):  Support other string reps.
    131           case FieldOptions::STRING:
    132             return sizeof(string*);
    133         }
    134         break;
    135     }
    136   }
    137 
    138   GOOGLE_LOG(DFATAL) << "Can't get here.";
    139   return 0;
    140 }
    141 
    142 inline int DivideRoundingUp(int i, int j) {
    143   return (i + (j - 1)) / j;
    144 }
    145 
    146 static const int kSafeAlignment = sizeof(uint64);
    147 
    148 inline int AlignTo(int offset, int alignment) {
    149   return DivideRoundingUp(offset, alignment) * alignment;
    150 }
    151 
    152 // Rounds the given byte offset up to the next offset aligned such that any
    153 // type may be stored at it.
    154 inline int AlignOffset(int offset) {
    155   return AlignTo(offset, kSafeAlignment);
    156 }
    157 
    158 #define bitsizeof(T) (sizeof(T) * 8)
    159 
    160 }  // namespace
    161 
    162 // ===================================================================
    163 
    164 class DynamicMessage : public Message {
    165  public:
    166   struct TypeInfo {
    167     int size;
    168     int has_bits_offset;
    169     int unknown_fields_offset;
    170     int extensions_offset;
    171 
    172     // Not owned by the TypeInfo.
    173     DynamicMessageFactory* factory;  // The factory that created this object.
    174     const DescriptorPool* pool;      // The factory's DescriptorPool.
    175     const Descriptor* type;          // Type of this DynamicMessage.
    176 
    177     // Warning:  The order in which the following pointers are defined is
    178     //   important (the prototype must be deleted *before* the offsets).
    179     scoped_array<int> offsets;
    180     scoped_ptr<const GeneratedMessageReflection> reflection;
    181     scoped_ptr<const DynamicMessage> prototype;
    182   };
    183 
    184   DynamicMessage(const TypeInfo* type_info);
    185   ~DynamicMessage();
    186 
    187   // Called on the prototype after construction to initialize message fields.
    188   void CrossLinkPrototypes();
    189 
    190   // implements Message ----------------------------------------------
    191 
    192   Message* New() const;
    193 
    194   int GetCachedSize() const;
    195   void SetCachedSize(int size) const;
    196 
    197   Metadata GetMetadata() const;
    198 
    199  private:
    200   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DynamicMessage);
    201 
    202   inline bool is_prototype() const {
    203     return type_info_->prototype == this ||
    204            // If type_info_->prototype is NULL, then we must be constructing
    205            // the prototype now, which means we must be the prototype.
    206            type_info_->prototype == NULL;
    207   }
    208 
    209   inline void* OffsetToPointer(int offset) {
    210     return reinterpret_cast<uint8*>(this) + offset;
    211   }
    212   inline const void* OffsetToPointer(int offset) const {
    213     return reinterpret_cast<const uint8*>(this) + offset;
    214   }
    215 
    216   const TypeInfo* type_info_;
    217 
    218   // TODO(kenton):  Make this an atomic<int> when C++ supports it.
    219   mutable int cached_byte_size_;
    220 };
    221 
    222 DynamicMessage::DynamicMessage(const TypeInfo* type_info)
    223   : type_info_(type_info),
    224     cached_byte_size_(0) {
    225   // We need to call constructors for various fields manually and set
    226   // default values where appropriate.  We use placement new to call
    227   // constructors.  If you haven't heard of placement new, I suggest Googling
    228   // it now.  We use placement new even for primitive types that don't have
    229   // constructors for consistency.  (In theory, placement new should be used
    230   // any time you are trying to convert untyped memory to typed memory, though
    231   // in practice that's not strictly necessary for types that don't have a
    232   // constructor.)
    233 
    234   const Descriptor* descriptor = type_info_->type;
    235 
    236   new(OffsetToPointer(type_info_->unknown_fields_offset)) UnknownFieldSet;
    237 
    238   if (type_info_->extensions_offset != -1) {
    239     new(OffsetToPointer(type_info_->extensions_offset)) ExtensionSet;
    240   }
    241 
    242   for (int i = 0; i < descriptor->field_count(); i++) {
    243     const FieldDescriptor* field = descriptor->field(i);
    244     void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
    245     switch (field->cpp_type()) {
    246 #define HANDLE_TYPE(CPPTYPE, TYPE)                                           \
    247       case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
    248         if (!field->is_repeated()) {                                         \
    249           new(field_ptr) TYPE(field->default_value_##TYPE());                \
    250         } else {                                                             \
    251           new(field_ptr) RepeatedField<TYPE>();                              \
    252         }                                                                    \
    253         break;
    254 
    255       HANDLE_TYPE(INT32 , int32 );
    256       HANDLE_TYPE(INT64 , int64 );
    257       HANDLE_TYPE(UINT32, uint32);
    258       HANDLE_TYPE(UINT64, uint64);
    259       HANDLE_TYPE(DOUBLE, double);
    260       HANDLE_TYPE(FLOAT , float );
    261       HANDLE_TYPE(BOOL  , bool  );
    262 #undef HANDLE_TYPE
    263 
    264       case FieldDescriptor::CPPTYPE_ENUM:
    265         if (!field->is_repeated()) {
    266           new(field_ptr) int(field->default_value_enum()->number());
    267         } else {
    268           new(field_ptr) RepeatedField<int>();
    269         }
    270         break;
    271 
    272       case FieldDescriptor::CPPTYPE_STRING:
    273         switch (field->options().ctype()) {
    274           default:  // TODO(kenton):  Support other string reps.
    275           case FieldOptions::STRING:
    276             if (!field->is_repeated()) {
    277               if (is_prototype()) {
    278                 new(field_ptr) const string*(&field->default_value_string());
    279               } else {
    280                 string* default_value =
    281                   *reinterpret_cast<string* const*>(
    282                     type_info_->prototype->OffsetToPointer(
    283                       type_info_->offsets[i]));
    284                 new(field_ptr) string*(default_value);
    285               }
    286             } else {
    287               new(field_ptr) RepeatedPtrField<string>();
    288             }
    289             break;
    290         }
    291         break;
    292 
    293       case FieldDescriptor::CPPTYPE_MESSAGE: {
    294         if (!field->is_repeated()) {
    295           new(field_ptr) Message*(NULL);
    296         } else {
    297           new(field_ptr) RepeatedPtrField<Message>();
    298         }
    299         break;
    300       }
    301     }
    302   }
    303 }
    304 
    305 DynamicMessage::~DynamicMessage() {
    306   const Descriptor* descriptor = type_info_->type;
    307 
    308   reinterpret_cast<UnknownFieldSet*>(
    309     OffsetToPointer(type_info_->unknown_fields_offset))->~UnknownFieldSet();
    310 
    311   if (type_info_->extensions_offset != -1) {
    312     reinterpret_cast<ExtensionSet*>(
    313       OffsetToPointer(type_info_->extensions_offset))->~ExtensionSet();
    314   }
    315 
    316   // We need to manually run the destructors for repeated fields and strings,
    317   // just as we ran their constructors in the the DynamicMessage constructor.
    318   // Additionally, if any singular embedded messages have been allocated, we
    319   // need to delete them, UNLESS we are the prototype message of this type,
    320   // in which case any embedded messages are other prototypes and shouldn't
    321   // be touched.
    322   for (int i = 0; i < descriptor->field_count(); i++) {
    323     const FieldDescriptor* field = descriptor->field(i);
    324     void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
    325 
    326     if (field->is_repeated()) {
    327       switch (field->cpp_type()) {
    328 #define HANDLE_TYPE(UPPERCASE, LOWERCASE)                                     \
    329         case FieldDescriptor::CPPTYPE_##UPPERCASE :                           \
    330           reinterpret_cast<RepeatedField<LOWERCASE>*>(field_ptr)              \
    331               ->~RepeatedField<LOWERCASE>();                                  \
    332           break
    333 
    334         HANDLE_TYPE( INT32,  int32);
    335         HANDLE_TYPE( INT64,  int64);
    336         HANDLE_TYPE(UINT32, uint32);
    337         HANDLE_TYPE(UINT64, uint64);
    338         HANDLE_TYPE(DOUBLE, double);
    339         HANDLE_TYPE( FLOAT,  float);
    340         HANDLE_TYPE(  BOOL,   bool);
    341         HANDLE_TYPE(  ENUM,    int);
    342 #undef HANDLE_TYPE
    343 
    344         case FieldDescriptor::CPPTYPE_STRING:
    345           switch (field->options().ctype()) {
    346             default:  // TODO(kenton):  Support other string reps.
    347             case FieldOptions::STRING:
    348               reinterpret_cast<RepeatedPtrField<string>*>(field_ptr)
    349                   ->~RepeatedPtrField<string>();
    350               break;
    351           }
    352           break;
    353 
    354         case FieldDescriptor::CPPTYPE_MESSAGE:
    355           reinterpret_cast<RepeatedPtrField<Message>*>(field_ptr)
    356               ->~RepeatedPtrField<Message>();
    357           break;
    358       }
    359 
    360     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING) {
    361       switch (field->options().ctype()) {
    362         default:  // TODO(kenton):  Support other string reps.
    363         case FieldOptions::STRING: {
    364           string* ptr = *reinterpret_cast<string**>(field_ptr);
    365           if (ptr != &field->default_value_string()) {
    366             delete ptr;
    367           }
    368           break;
    369         }
    370       }
    371     } else if ((field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) &&
    372                !is_prototype()) {
    373       Message* message = *reinterpret_cast<Message**>(field_ptr);
    374       if (message != NULL) {
    375         delete message;
    376       }
    377     }
    378   }
    379 }
    380 
    381 void DynamicMessage::CrossLinkPrototypes() {
    382   // This should only be called on the prototype message.
    383   GOOGLE_CHECK(is_prototype());
    384 
    385   DynamicMessageFactory* factory = type_info_->factory;
    386   const Descriptor* descriptor = type_info_->type;
    387 
    388   // Cross-link default messages.
    389   for (int i = 0; i < descriptor->field_count(); i++) {
    390     const FieldDescriptor* field = descriptor->field(i);
    391     void* field_ptr = OffsetToPointer(type_info_->offsets[i]);
    392 
    393     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
    394         !field->is_repeated()) {
    395       // For fields with message types, we need to cross-link with the
    396       // prototype for the field's type.
    397       // For singular fields, the field is just a pointer which should
    398       // point to the prototype.
    399       *reinterpret_cast<const Message**>(field_ptr) =
    400         factory->GetPrototypeNoLock(field->message_type());
    401     }
    402   }
    403 }
    404 
    405 Message* DynamicMessage::New() const {
    406   void* new_base = reinterpret_cast<uint8*>(operator new(type_info_->size));
    407   memset(new_base, 0, type_info_->size);
    408   return new(new_base) DynamicMessage(type_info_);
    409 }
    410 
    411 int DynamicMessage::GetCachedSize() const {
    412   return cached_byte_size_;
    413 }
    414 
    415 void DynamicMessage::SetCachedSize(int size) const {
    416   // This is theoretically not thread-compatible, but in practice it works
    417   // because if multiple threads write this simultaneously, they will be
    418   // writing the exact same value.
    419   cached_byte_size_ = size;
    420 }
    421 
    422 Metadata DynamicMessage::GetMetadata() const {
    423   Metadata metadata;
    424   metadata.descriptor = type_info_->type;
    425   metadata.reflection = type_info_->reflection.get();
    426   return metadata;
    427 }
    428 
    429 // ===================================================================
    430 
    431 struct DynamicMessageFactory::PrototypeMap {
    432   typedef hash_map<const Descriptor*, const DynamicMessage::TypeInfo*> Map;
    433   Map map_;
    434 };
    435 
    436 DynamicMessageFactory::DynamicMessageFactory()
    437   : pool_(NULL), delegate_to_generated_factory_(false),
    438     prototypes_(new PrototypeMap) {
    439 }
    440 
    441 DynamicMessageFactory::DynamicMessageFactory(const DescriptorPool* pool)
    442   : pool_(pool), delegate_to_generated_factory_(false),
    443     prototypes_(new PrototypeMap) {
    444 }
    445 
    446 DynamicMessageFactory::~DynamicMessageFactory() {
    447   for (PrototypeMap::Map::iterator iter = prototypes_->map_.begin();
    448        iter != prototypes_->map_.end(); ++iter) {
    449     delete iter->second;
    450   }
    451 }
    452 
    453 const Message* DynamicMessageFactory::GetPrototype(const Descriptor* type) {
    454   MutexLock lock(&prototypes_mutex_);
    455   return GetPrototypeNoLock(type);
    456 }
    457 
    458 const Message* DynamicMessageFactory::GetPrototypeNoLock(
    459     const Descriptor* type) {
    460   if (delegate_to_generated_factory_ &&
    461       type->file()->pool() == DescriptorPool::generated_pool()) {
    462     return MessageFactory::generated_factory()->GetPrototype(type);
    463   }
    464 
    465   const DynamicMessage::TypeInfo** target = &prototypes_->map_[type];
    466   if (*target != NULL) {
    467     // Already exists.
    468     return (*target)->prototype.get();
    469   }
    470 
    471   DynamicMessage::TypeInfo* type_info = new DynamicMessage::TypeInfo;
    472   *target = type_info;
    473 
    474   type_info->type = type;
    475   type_info->pool = (pool_ == NULL) ? type->file()->pool() : pool_;
    476   type_info->factory = this;
    477 
    478   // We need to construct all the structures passed to
    479   // GeneratedMessageReflection's constructor.  This includes:
    480   // - A block of memory that contains space for all the message's fields.
    481   // - An array of integers indicating the byte offset of each field within
    482   //   this block.
    483   // - A big bitfield containing a bit for each field indicating whether
    484   //   or not that field is set.
    485 
    486   // Compute size and offsets.
    487   int* offsets = new int[type->field_count()];
    488   type_info->offsets.reset(offsets);
    489 
    490   // Decide all field offsets by packing in order.
    491   // We place the DynamicMessage object itself at the beginning of the allocated
    492   // space.
    493   int size = sizeof(DynamicMessage);
    494   size = AlignOffset(size);
    495 
    496   // Next the has_bits, which is an array of uint32s.
    497   type_info->has_bits_offset = size;
    498   int has_bits_array_size =
    499     DivideRoundingUp(type->field_count(), bitsizeof(uint32));
    500   size += has_bits_array_size * sizeof(uint32);
    501   size = AlignOffset(size);
    502 
    503   // The ExtensionSet, if any.
    504   if (type->extension_range_count() > 0) {
    505     type_info->extensions_offset = size;
    506     size += sizeof(ExtensionSet);
    507     size = AlignOffset(size);
    508   } else {
    509     // No extensions.
    510     type_info->extensions_offset = -1;
    511   }
    512 
    513   // All the fields.
    514   for (int i = 0; i < type->field_count(); i++) {
    515     // Make sure field is aligned to avoid bus errors.
    516     int field_size = FieldSpaceUsed(type->field(i));
    517     size = AlignTo(size, min(kSafeAlignment, field_size));
    518     offsets[i] = size;
    519     size += field_size;
    520   }
    521 
    522   // Add the UnknownFieldSet to the end.
    523   size = AlignOffset(size);
    524   type_info->unknown_fields_offset = size;
    525   size += sizeof(UnknownFieldSet);
    526 
    527   // Align the final size to make sure no clever allocators think that
    528   // alignment is not necessary.
    529   size = AlignOffset(size);
    530   type_info->size = size;
    531 
    532   // Allocate the prototype.
    533   void* base = operator new(size);
    534   memset(base, 0, size);
    535   DynamicMessage* prototype = new(base) DynamicMessage(type_info);
    536   type_info->prototype.reset(prototype);
    537 
    538   // Construct the reflection object.
    539   type_info->reflection.reset(
    540     new GeneratedMessageReflection(
    541       type_info->type,
    542       type_info->prototype.get(),
    543       type_info->offsets.get(),
    544       type_info->has_bits_offset,
    545       type_info->unknown_fields_offset,
    546       type_info->extensions_offset,
    547       type_info->pool,
    548       this,
    549       type_info->size));
    550 
    551   // Cross link prototypes.
    552   prototype->CrossLinkPrototypes();
    553 
    554   return prototype;
    555 }
    556 
    557 }  // namespace protobuf
    558 }  // namespace google
    559