Home | History | Annotate | Download | only in pyext
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: anuraag (at) google.com (Anuraag Agrawal)
     32 // Author: tibell (at) google.com (Johan Tibell)
     33 
     34 #include <google/protobuf/pyext/message.h>
     35 
     36 #include <map>
     37 #include <memory>
     38 #ifndef _SHARED_PTR_H
     39 #include <google/protobuf/stubs/shared_ptr.h>
     40 #endif
     41 #include <string>
     42 #include <vector>
     43 #include <structmember.h>  // A Python header file.
     44 
     45 #ifndef PyVarObject_HEAD_INIT
     46 #define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
     47 #endif
     48 #ifndef Py_TYPE
     49 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
     50 #endif
     51 #include <google/protobuf/descriptor.pb.h>
     52 #include <google/protobuf/stubs/common.h>
     53 #include <google/protobuf/stubs/logging.h>
     54 #include <google/protobuf/io/coded_stream.h>
     55 #include <google/protobuf/util/message_differencer.h>
     56 #include <google/protobuf/descriptor.h>
     57 #include <google/protobuf/message.h>
     58 #include <google/protobuf/text_format.h>
     59 #include <google/protobuf/unknown_field_set.h>
     60 #include <google/protobuf/pyext/descriptor.h>
     61 #include <google/protobuf/pyext/descriptor_pool.h>
     62 #include <google/protobuf/pyext/extension_dict.h>
     63 #include <google/protobuf/pyext/repeated_composite_container.h>
     64 #include <google/protobuf/pyext/repeated_scalar_container.h>
     65 #include <google/protobuf/pyext/map_container.h>
     66 #include <google/protobuf/pyext/scoped_pyobject_ptr.h>
     67 #include <google/protobuf/stubs/strutil.h>
     68 
     69 #if PY_MAJOR_VERSION >= 3
     70   #define PyInt_Check PyLong_Check
     71   #define PyInt_AsLong PyLong_AsLong
     72   #define PyInt_FromLong PyLong_FromLong
     73   #define PyInt_FromSize_t PyLong_FromSize_t
     74   #define PyString_Check PyUnicode_Check
     75   #define PyString_FromString PyUnicode_FromString
     76   #define PyString_FromStringAndSize PyUnicode_FromStringAndSize
     77   #if PY_VERSION_HEX < 0x03030000
     78     #error "Python 3.0 - 3.2 are not supported."
     79   #else
     80   #define PyString_AsString(ob) \
     81     (PyUnicode_Check(ob)? PyUnicode_AsUTF8(ob): PyBytes_AsString(ob))
     82   #define PyString_AsStringAndSize(ob, charpp, sizep) \
     83     (PyUnicode_Check(ob)? \
     84        ((*(charpp) = PyUnicode_AsUTF8AndSize(ob, (sizep))) == NULL? -1: 0): \
     85        PyBytes_AsStringAndSize(ob, (charpp), (sizep)))
     86   #endif
     87 #endif
     88 
     89 namespace google {
     90 namespace protobuf {
     91 namespace python {
     92 
     93 static PyObject* kDESCRIPTOR;
     94 static PyObject* k_extensions_by_name;
     95 static PyObject* k_extensions_by_number;
     96 PyObject* EnumTypeWrapper_class;
     97 static PyObject* PythonMessage_class;
     98 static PyObject* kEmptyWeakref;
     99 static PyObject* WKT_classes = NULL;
    100 
    101 namespace message_meta {
    102 
    103 static int InsertEmptyWeakref(PyTypeObject* base);
    104 
    105 // Add the number of a field descriptor to the containing message class.
    106 // Equivalent to:
    107 //   _cls.<field>_FIELD_NUMBER = <number>
    108 static bool AddFieldNumberToClass(
    109     PyObject* cls, const FieldDescriptor* field_descriptor) {
    110   string constant_name = field_descriptor->name() + "_FIELD_NUMBER";
    111   UpperString(&constant_name);
    112   ScopedPyObjectPtr attr_name(PyString_FromStringAndSize(
    113       constant_name.c_str(), constant_name.size()));
    114   if (attr_name == NULL) {
    115     return false;
    116   }
    117   ScopedPyObjectPtr number(PyInt_FromLong(field_descriptor->number()));
    118   if (number == NULL) {
    119     return false;
    120   }
    121   if (PyObject_SetAttr(cls, attr_name.get(), number.get()) == -1) {
    122     return false;
    123   }
    124   return true;
    125 }
    126 
    127 
    128 // Finalize the creation of the Message class.
    129 static int AddDescriptors(PyObject* cls, const Descriptor* descriptor) {
    130   // If there are extension_ranges, the message is "extendable", and extension
    131   // classes will register themselves in this class.
    132   if (descriptor->extension_range_count() > 0) {
    133     ScopedPyObjectPtr by_name(PyDict_New());
    134     if (PyObject_SetAttr(cls, k_extensions_by_name, by_name.get()) < 0) {
    135       return -1;
    136     }
    137     ScopedPyObjectPtr by_number(PyDict_New());
    138     if (PyObject_SetAttr(cls, k_extensions_by_number, by_number.get()) < 0) {
    139       return -1;
    140     }
    141   }
    142 
    143   // For each field set: cls.<field>_FIELD_NUMBER = <number>
    144   for (int i = 0; i < descriptor->field_count(); ++i) {
    145     if (!AddFieldNumberToClass(cls, descriptor->field(i))) {
    146       return -1;
    147     }
    148   }
    149 
    150   // For each enum set cls.<enum name> = EnumTypeWrapper(<enum descriptor>).
    151   for (int i = 0; i < descriptor->enum_type_count(); ++i) {
    152     const EnumDescriptor* enum_descriptor = descriptor->enum_type(i);
    153     ScopedPyObjectPtr enum_type(
    154         PyEnumDescriptor_FromDescriptor(enum_descriptor));
    155     if (enum_type == NULL) {
    156       return -1;
    157      }
    158     // Add wrapped enum type to message class.
    159     ScopedPyObjectPtr wrapped(PyObject_CallFunctionObjArgs(
    160         EnumTypeWrapper_class, enum_type.get(), NULL));
    161     if (wrapped == NULL) {
    162       return -1;
    163     }
    164     if (PyObject_SetAttrString(
    165             cls, enum_descriptor->name().c_str(), wrapped.get()) == -1) {
    166       return -1;
    167     }
    168 
    169     // For each enum value add cls.<name> = <number>
    170     for (int j = 0; j < enum_descriptor->value_count(); ++j) {
    171       const EnumValueDescriptor* enum_value_descriptor =
    172           enum_descriptor->value(j);
    173       ScopedPyObjectPtr value_number(PyInt_FromLong(
    174           enum_value_descriptor->number()));
    175       if (value_number == NULL) {
    176         return -1;
    177       }
    178       if (PyObject_SetAttrString(cls, enum_value_descriptor->name().c_str(),
    179                                  value_number.get()) == -1) {
    180         return -1;
    181       }
    182     }
    183   }
    184 
    185   // For each extension set cls.<extension name> = <extension descriptor>.
    186   //
    187   // Extension descriptors come from
    188   // <message descriptor>.extensions_by_name[name]
    189   // which was defined previously.
    190   for (int i = 0; i < descriptor->extension_count(); ++i) {
    191     const google::protobuf::FieldDescriptor* field = descriptor->extension(i);
    192     ScopedPyObjectPtr extension_field(PyFieldDescriptor_FromDescriptor(field));
    193     if (extension_field == NULL) {
    194       return -1;
    195     }
    196 
    197     // Add the extension field to the message class.
    198     if (PyObject_SetAttrString(
    199             cls, field->name().c_str(), extension_field.get()) == -1) {
    200       return -1;
    201     }
    202 
    203     // For each extension set cls.<extension name>_FIELD_NUMBER = <number>.
    204     if (!AddFieldNumberToClass(cls, field)) {
    205       return -1;
    206     }
    207   }
    208 
    209   return 0;
    210 }
    211 
    212 static PyObject* New(PyTypeObject* type,
    213                      PyObject* args, PyObject* kwargs) {
    214   static char *kwlist[] = {"name", "bases", "dict", 0};
    215   PyObject *bases, *dict;
    216   const char* name;
    217 
    218   // Check arguments: (name, bases, dict)
    219   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sO!O!:type", kwlist,
    220                                    &name,
    221                                    &PyTuple_Type, &bases,
    222                                    &PyDict_Type, &dict)) {
    223     return NULL;
    224   }
    225 
    226   // Check bases: only (), or (message.Message,) are allowed
    227   if (!(PyTuple_GET_SIZE(bases) == 0 ||
    228         (PyTuple_GET_SIZE(bases) == 1 &&
    229          PyTuple_GET_ITEM(bases, 0) == PythonMessage_class))) {
    230     PyErr_SetString(PyExc_TypeError,
    231                     "A Message class can only inherit from Message");
    232     return NULL;
    233   }
    234 
    235   // Check dict['DESCRIPTOR']
    236   PyObject* py_descriptor = PyDict_GetItem(dict, kDESCRIPTOR);
    237   if (py_descriptor == NULL) {
    238     PyErr_SetString(PyExc_TypeError, "Message class has no DESCRIPTOR");
    239     return NULL;
    240   }
    241   if (!PyObject_TypeCheck(py_descriptor, &PyMessageDescriptor_Type)) {
    242     PyErr_Format(PyExc_TypeError, "Expected a message Descriptor, got %s",
    243                  py_descriptor->ob_type->tp_name);
    244     return NULL;
    245   }
    246 
    247   // Build the arguments to the base metaclass.
    248   // We change the __bases__ classes.
    249   ScopedPyObjectPtr new_args;
    250   const Descriptor* message_descriptor =
    251       PyMessageDescriptor_AsDescriptor(py_descriptor);
    252   if (message_descriptor == NULL) {
    253     return NULL;
    254   }
    255 
    256   if (WKT_classes == NULL) {
    257     ScopedPyObjectPtr well_known_types(PyImport_ImportModule(
    258         "google.protobuf.internal.well_known_types"));
    259     GOOGLE_DCHECK(well_known_types != NULL);
    260 
    261     WKT_classes = PyObject_GetAttrString(well_known_types.get(), "WKTBASES");
    262     GOOGLE_DCHECK(WKT_classes != NULL);
    263   }
    264 
    265   PyObject* well_known_class = PyDict_GetItemString(
    266       WKT_classes, message_descriptor->full_name().c_str());
    267   if (well_known_class == NULL) {
    268     new_args.reset(Py_BuildValue("s(OO)O", name, &CMessage_Type,
    269                                  PythonMessage_class, dict));
    270   } else {
    271     new_args.reset(Py_BuildValue("s(OOO)O", name, &CMessage_Type,
    272                                  PythonMessage_class, well_known_class, dict));
    273   }
    274 
    275   if (new_args == NULL) {
    276     return NULL;
    277   }
    278   // Call the base metaclass.
    279   ScopedPyObjectPtr result(PyType_Type.tp_new(type, new_args.get(), NULL));
    280   if (result == NULL) {
    281     return NULL;
    282   }
    283   CMessageClass* newtype = reinterpret_cast<CMessageClass*>(result.get());
    284 
    285   // Insert the empty weakref into the base classes.
    286   if (InsertEmptyWeakref(
    287           reinterpret_cast<PyTypeObject*>(PythonMessage_class)) < 0 ||
    288       InsertEmptyWeakref(&CMessage_Type) < 0) {
    289     return NULL;
    290   }
    291 
    292   // Cache the descriptor, both as Python object and as C++ pointer.
    293   const Descriptor* descriptor =
    294       PyMessageDescriptor_AsDescriptor(py_descriptor);
    295   if (descriptor == NULL) {
    296     return NULL;
    297   }
    298   Py_INCREF(py_descriptor);
    299   newtype->py_message_descriptor = py_descriptor;
    300   newtype->message_descriptor = descriptor;
    301   // TODO(amauryfa): Don't always use the canonical pool of the descriptor,
    302   // use the MessageFactory optionally passed in the class dict.
    303   newtype->py_descriptor_pool = GetDescriptorPool_FromPool(
    304       descriptor->file()->pool());
    305   if (newtype->py_descriptor_pool == NULL) {
    306     return NULL;
    307   }
    308   Py_INCREF(newtype->py_descriptor_pool);
    309 
    310   // Add the message to the DescriptorPool.
    311   if (cdescriptor_pool::RegisterMessageClass(newtype->py_descriptor_pool,
    312                                              descriptor, newtype) < 0) {
    313     return NULL;
    314   }
    315 
    316   // Continue with type initialization: add other descriptors, enum values...
    317   if (AddDescriptors(result.get(), descriptor) < 0) {
    318     return NULL;
    319   }
    320   return result.release();
    321 }
    322 
    323 static void Dealloc(CMessageClass *self) {
    324   Py_DECREF(self->py_message_descriptor);
    325   Py_DECREF(self->py_descriptor_pool);
    326   Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
    327 }
    328 
    329 
    330 // This function inserts and empty weakref at the end of the list of
    331 // subclasses for the main protocol buffer Message class.
    332 //
    333 // This eliminates a O(n^2) behaviour in the internal add_subclass
    334 // routine.
    335 static int InsertEmptyWeakref(PyTypeObject *base_type) {
    336 #if PY_MAJOR_VERSION >= 3
    337   // Python 3.4 has already included the fix for the issue that this
    338   // hack addresses. For further background and the fix please see
    339   // https://bugs.python.org/issue17936.
    340   return 0;
    341 #else
    342   PyObject *subclasses = base_type->tp_subclasses;
    343   if (subclasses && PyList_CheckExact(subclasses)) {
    344     return PyList_Append(subclasses, kEmptyWeakref);
    345   }
    346   return 0;
    347 #endif  // PY_MAJOR_VERSION >= 3
    348 }
    349 
    350 }  // namespace message_meta
    351 
    352 PyTypeObject CMessageClass_Type = {
    353   PyVarObject_HEAD_INIT(&PyType_Type, 0)
    354   FULL_MODULE_NAME ".MessageMeta",     // tp_name
    355   sizeof(CMessageClass),               // tp_basicsize
    356   0,                                   // tp_itemsize
    357   (destructor)message_meta::Dealloc,   // tp_dealloc
    358   0,                                   // tp_print
    359   0,                                   // tp_getattr
    360   0,                                   // tp_setattr
    361   0,                                   // tp_compare
    362   0,                                   // tp_repr
    363   0,                                   // tp_as_number
    364   0,                                   // tp_as_sequence
    365   0,                                   // tp_as_mapping
    366   0,                                   // tp_hash
    367   0,                                   // tp_call
    368   0,                                   // tp_str
    369   0,                                   // tp_getattro
    370   0,                                   // tp_setattro
    371   0,                                   // tp_as_buffer
    372   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  // tp_flags
    373   "The metaclass of ProtocolMessages",  // tp_doc
    374   0,                                   // tp_traverse
    375   0,                                   // tp_clear
    376   0,                                   // tp_richcompare
    377   0,                                   // tp_weaklistoffset
    378   0,                                   // tp_iter
    379   0,                                   // tp_iternext
    380   0,                                   // tp_methods
    381   0,                                   // tp_members
    382   0,                                   // tp_getset
    383   0,                                   // tp_base
    384   0,                                   // tp_dict
    385   0,                                   // tp_descr_get
    386   0,                                   // tp_descr_set
    387   0,                                   // tp_dictoffset
    388   0,                                   // tp_init
    389   0,                                   // tp_alloc
    390   message_meta::New,                   // tp_new
    391 };
    392 
    393 static CMessageClass* CheckMessageClass(PyTypeObject* cls) {
    394   if (!PyObject_TypeCheck(cls, &CMessageClass_Type)) {
    395     PyErr_Format(PyExc_TypeError, "Class %s is not a Message", cls->tp_name);
    396     return NULL;
    397   }
    398   return reinterpret_cast<CMessageClass*>(cls);
    399 }
    400 
    401 static const Descriptor* GetMessageDescriptor(PyTypeObject* cls) {
    402   CMessageClass* type = CheckMessageClass(cls);
    403   if (type == NULL) {
    404     return NULL;
    405   }
    406   return type->message_descriptor;
    407 }
    408 
    409 // Forward declarations
    410 namespace cmessage {
    411 int InternalReleaseFieldByDescriptor(
    412     CMessage* self,
    413     const FieldDescriptor* field_descriptor,
    414     PyObject* composite_field);
    415 }  // namespace cmessage
    416 
    417 // ---------------------------------------------------------------------
    418 // Visiting the composite children of a CMessage
    419 
    420 struct ChildVisitor {
    421   // Returns 0 on success, -1 on failure.
    422   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
    423     return 0;
    424   }
    425 
    426   // Returns 0 on success, -1 on failure.
    427   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
    428     return 0;
    429   }
    430 
    431   // Returns 0 on success, -1 on failure.
    432   int VisitCMessage(CMessage* cmessage,
    433                     const FieldDescriptor* field_descriptor) {
    434     return 0;
    435   }
    436 };
    437 
    438 // Apply a function to a composite field.  Does nothing if child is of
    439 // non-composite type.
    440 template<class Visitor>
    441 static int VisitCompositeField(const FieldDescriptor* descriptor,
    442                                PyObject* child,
    443                                Visitor visitor) {
    444   if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
    445     if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    446       if (descriptor->is_map()) {
    447         MapContainer* container = reinterpret_cast<MapContainer*>(child);
    448         if (visitor.VisitMapContainer(container) == -1) {
    449           return -1;
    450         }
    451       } else {
    452         RepeatedCompositeContainer* container =
    453           reinterpret_cast<RepeatedCompositeContainer*>(child);
    454         if (visitor.VisitRepeatedCompositeContainer(container) == -1)
    455           return -1;
    456       }
    457     } else {
    458       RepeatedScalarContainer* container =
    459         reinterpret_cast<RepeatedScalarContainer*>(child);
    460       if (visitor.VisitRepeatedScalarContainer(container) == -1)
    461         return -1;
    462     }
    463   } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
    464     CMessage* cmsg = reinterpret_cast<CMessage*>(child);
    465     if (visitor.VisitCMessage(cmsg, descriptor) == -1)
    466       return -1;
    467   }
    468   // The ExtensionDict might contain non-composite fields, which we
    469   // skip here.
    470   return 0;
    471 }
    472 
    473 // Visit each composite field and extension field of this CMessage.
    474 // Returns -1 on error and 0 on success.
    475 template<class Visitor>
    476 int ForEachCompositeField(CMessage* self, Visitor visitor) {
    477   Py_ssize_t pos = 0;
    478   PyObject* key;
    479   PyObject* field;
    480 
    481   // Visit normal fields.
    482   if (self->composite_fields) {
    483     // Never use self->message in this function, it may be already freed.
    484     const Descriptor* message_descriptor =
    485         GetMessageDescriptor(Py_TYPE(self));
    486     while (PyDict_Next(self->composite_fields, &pos, &key, &field)) {
    487       Py_ssize_t key_str_size;
    488       char *key_str_data;
    489       if (PyString_AsStringAndSize(key, &key_str_data, &key_str_size) != 0)
    490         return -1;
    491       const string key_str(key_str_data, key_str_size);
    492       const FieldDescriptor* descriptor =
    493         message_descriptor->FindFieldByName(key_str);
    494       if (descriptor != NULL) {
    495         if (VisitCompositeField(descriptor, field, visitor) == -1)
    496           return -1;
    497       }
    498     }
    499   }
    500 
    501   // Visit extension fields.
    502   if (self->extensions != NULL) {
    503     pos = 0;
    504     while (PyDict_Next(self->extensions->values, &pos, &key, &field)) {
    505       const FieldDescriptor* descriptor = cmessage::GetExtensionDescriptor(key);
    506       if (descriptor == NULL)
    507         return -1;
    508       if (VisitCompositeField(descriptor, field, visitor) == -1)
    509         return -1;
    510     }
    511   }
    512 
    513   return 0;
    514 }
    515 
    516 // ---------------------------------------------------------------------
    517 
    518 // Constants used for integer type range checking.
    519 PyObject* kPythonZero;
    520 PyObject* kint32min_py;
    521 PyObject* kint32max_py;
    522 PyObject* kuint32max_py;
    523 PyObject* kint64min_py;
    524 PyObject* kint64max_py;
    525 PyObject* kuint64max_py;
    526 
    527 PyObject* EncodeError_class;
    528 PyObject* DecodeError_class;
    529 PyObject* PickleError_class;
    530 
    531 // Constant PyString values used for GetAttr/GetItem.
    532 static PyObject* k_cdescriptor;
    533 static PyObject* kfull_name;
    534 
    535 /* Is 64bit */
    536 void FormatTypeError(PyObject* arg, char* expected_types) {
    537   PyObject* repr = PyObject_Repr(arg);
    538   if (repr) {
    539     PyErr_Format(PyExc_TypeError,
    540                  "%.100s has type %.100s, but expected one of: %s",
    541                  PyString_AsString(repr),
    542                  Py_TYPE(arg)->tp_name,
    543                  expected_types);
    544     Py_DECREF(repr);
    545   }
    546 }
    547 
    548 template<class T>
    549 bool CheckAndGetInteger(
    550     PyObject* arg, T* value, PyObject* min, PyObject* max) {
    551   bool is_long = PyLong_Check(arg);
    552 #if PY_MAJOR_VERSION < 3
    553   if (!PyInt_Check(arg) && !is_long) {
    554     FormatTypeError(arg, "int, long");
    555     return false;
    556   }
    557   if (PyObject_Compare(min, arg) > 0 || PyObject_Compare(max, arg) < 0) {
    558 #else
    559   if (!is_long) {
    560     FormatTypeError(arg, "int");
    561     return false;
    562   }
    563   if (PyObject_RichCompareBool(min, arg, Py_LE) != 1 ||
    564       PyObject_RichCompareBool(max, arg, Py_GE) != 1) {
    565 #endif
    566     if (!PyErr_Occurred()) {
    567       PyObject *s = PyObject_Str(arg);
    568       if (s) {
    569         PyErr_Format(PyExc_ValueError,
    570                      "Value out of range: %s",
    571                      PyString_AsString(s));
    572         Py_DECREF(s);
    573       }
    574     }
    575     return false;
    576   }
    577 #if PY_MAJOR_VERSION < 3
    578   if (!is_long) {
    579     *value = static_cast<T>(PyInt_AsLong(arg));
    580   } else  // NOLINT
    581 #endif
    582   {
    583     if (min == kPythonZero) {
    584       *value = static_cast<T>(PyLong_AsUnsignedLongLong(arg));
    585     } else {
    586       *value = static_cast<T>(PyLong_AsLongLong(arg));
    587     }
    588   }
    589   return true;
    590 }
    591 
    592 // These are referenced by repeated_scalar_container, and must
    593 // be explicitly instantiated.
    594 template bool CheckAndGetInteger<int32>(
    595     PyObject*, int32*, PyObject*, PyObject*);
    596 template bool CheckAndGetInteger<int64>(
    597     PyObject*, int64*, PyObject*, PyObject*);
    598 template bool CheckAndGetInteger<uint32>(
    599     PyObject*, uint32*, PyObject*, PyObject*);
    600 template bool CheckAndGetInteger<uint64>(
    601     PyObject*, uint64*, PyObject*, PyObject*);
    602 
    603 bool CheckAndGetDouble(PyObject* arg, double* value) {
    604   if (!PyInt_Check(arg) && !PyLong_Check(arg) &&
    605       !PyFloat_Check(arg)) {
    606     FormatTypeError(arg, "int, long, float");
    607     return false;
    608   }
    609   *value = PyFloat_AsDouble(arg);
    610   return true;
    611 }
    612 
    613 bool CheckAndGetFloat(PyObject* arg, float* value) {
    614   double double_value;
    615   if (!CheckAndGetDouble(arg, &double_value)) {
    616     return false;
    617   }
    618   *value = static_cast<float>(double_value);
    619   return true;
    620 }
    621 
    622 bool CheckAndGetBool(PyObject* arg, bool* value) {
    623   if (!PyInt_Check(arg) && !PyBool_Check(arg) && !PyLong_Check(arg)) {
    624     FormatTypeError(arg, "int, long, bool");
    625     return false;
    626   }
    627   *value = static_cast<bool>(PyInt_AsLong(arg));
    628   return true;
    629 }
    630 
    631 // Checks whether the given object (which must be "bytes" or "unicode") contains
    632 // valid UTF-8.
    633 bool IsValidUTF8(PyObject* obj) {
    634   if (PyBytes_Check(obj)) {
    635     PyObject* unicode = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
    636 
    637     // Clear the error indicator; we report our own error when desired.
    638     PyErr_Clear();
    639 
    640     if (unicode) {
    641       Py_DECREF(unicode);
    642       return true;
    643     } else {
    644       return false;
    645     }
    646   } else {
    647     // Unicode object, known to be valid UTF-8.
    648     return true;
    649   }
    650 }
    651 
    652 bool AllowInvalidUTF8(const FieldDescriptor* field) { return false; }
    653 
    654 PyObject* CheckString(PyObject* arg, const FieldDescriptor* descriptor) {
    655   GOOGLE_DCHECK(descriptor->type() == FieldDescriptor::TYPE_STRING ||
    656          descriptor->type() == FieldDescriptor::TYPE_BYTES);
    657   if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
    658     if (!PyBytes_Check(arg) && !PyUnicode_Check(arg)) {
    659       FormatTypeError(arg, "bytes, unicode");
    660       return NULL;
    661     }
    662 
    663     if (!IsValidUTF8(arg) && !AllowInvalidUTF8(descriptor)) {
    664       PyObject* repr = PyObject_Repr(arg);
    665       PyErr_Format(PyExc_ValueError,
    666                    "%s has type str, but isn't valid UTF-8 "
    667                    "encoding. Non-UTF-8 strings must be converted to "
    668                    "unicode objects before being added.",
    669                    PyString_AsString(repr));
    670       Py_DECREF(repr);
    671       return NULL;
    672     }
    673   } else if (!PyBytes_Check(arg)) {
    674     FormatTypeError(arg, "bytes");
    675     return NULL;
    676   }
    677 
    678   PyObject* encoded_string = NULL;
    679   if (descriptor->type() == FieldDescriptor::TYPE_STRING) {
    680     if (PyBytes_Check(arg)) {
    681       // The bytes were already validated as correctly encoded UTF-8 above.
    682       encoded_string = arg;  // Already encoded.
    683       Py_INCREF(encoded_string);
    684     } else {
    685       encoded_string = PyUnicode_AsEncodedObject(arg, "utf-8", NULL);
    686     }
    687   } else {
    688     // In this case field type is "bytes".
    689     encoded_string = arg;
    690     Py_INCREF(encoded_string);
    691   }
    692 
    693   return encoded_string;
    694 }
    695 
    696 bool CheckAndSetString(
    697     PyObject* arg, Message* message,
    698     const FieldDescriptor* descriptor,
    699     const Reflection* reflection,
    700     bool append,
    701     int index) {
    702   ScopedPyObjectPtr encoded_string(CheckString(arg, descriptor));
    703 
    704   if (encoded_string.get() == NULL) {
    705     return false;
    706   }
    707 
    708   char* value;
    709   Py_ssize_t value_len;
    710   if (PyBytes_AsStringAndSize(encoded_string.get(), &value, &value_len) < 0) {
    711     return false;
    712   }
    713 
    714   string value_string(value, value_len);
    715   if (append) {
    716     reflection->AddString(message, descriptor, value_string);
    717   } else if (index < 0) {
    718     reflection->SetString(message, descriptor, value_string);
    719   } else {
    720     reflection->SetRepeatedString(message, descriptor, index, value_string);
    721   }
    722   return true;
    723 }
    724 
    725 PyObject* ToStringObject(const FieldDescriptor* descriptor, string value) {
    726   if (descriptor->type() != FieldDescriptor::TYPE_STRING) {
    727     return PyBytes_FromStringAndSize(value.c_str(), value.length());
    728   }
    729 
    730   PyObject* result = PyUnicode_DecodeUTF8(value.c_str(), value.length(), NULL);
    731   // If the string can't be decoded in UTF-8, just return a string object that
    732   // contains the raw bytes. This can't happen if the value was assigned using
    733   // the members of the Python message object, but can happen if the values were
    734   // parsed from the wire (binary).
    735   if (result == NULL) {
    736     PyErr_Clear();
    737     result = PyBytes_FromStringAndSize(value.c_str(), value.length());
    738   }
    739   return result;
    740 }
    741 
    742 bool CheckFieldBelongsToMessage(const FieldDescriptor* field_descriptor,
    743                                 const Message* message) {
    744   if (message->GetDescriptor() == field_descriptor->containing_type()) {
    745     return true;
    746   }
    747   PyErr_Format(PyExc_KeyError, "Field '%s' does not belong to message '%s'",
    748                field_descriptor->full_name().c_str(),
    749                message->GetDescriptor()->full_name().c_str());
    750   return false;
    751 }
    752 
    753 namespace cmessage {
    754 
    755 PyDescriptorPool* GetDescriptorPoolForMessage(CMessage* message) {
    756   // No need to check the type: the type of instances of CMessage is always
    757   // an instance of CMessageClass. Let's prove it with a debug-only check.
    758   GOOGLE_DCHECK(PyObject_TypeCheck(message, &CMessage_Type));
    759   return reinterpret_cast<CMessageClass*>(Py_TYPE(message))->py_descriptor_pool;
    760 }
    761 
    762 MessageFactory* GetFactoryForMessage(CMessage* message) {
    763   return GetDescriptorPoolForMessage(message)->message_factory;
    764 }
    765 
    766 static int MaybeReleaseOverlappingOneofField(
    767     CMessage* cmessage,
    768     const FieldDescriptor* field) {
    769 #ifdef GOOGLE_PROTOBUF_HAS_ONEOF
    770   Message* message = cmessage->message;
    771   const Reflection* reflection = message->GetReflection();
    772   if (!field->containing_oneof() ||
    773       !reflection->HasOneof(*message, field->containing_oneof()) ||
    774       reflection->HasField(*message, field)) {
    775     // No other field in this oneof, no need to release.
    776     return 0;
    777   }
    778 
    779   const OneofDescriptor* oneof = field->containing_oneof();
    780   const FieldDescriptor* existing_field =
    781       reflection->GetOneofFieldDescriptor(*message, oneof);
    782   if (existing_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
    783     // Non-message fields don't need to be released.
    784     return 0;
    785   }
    786   const char* field_name = existing_field->name().c_str();
    787   PyObject* child_message = cmessage->composite_fields ?
    788       PyDict_GetItemString(cmessage->composite_fields, field_name) : NULL;
    789   if (child_message == NULL) {
    790     // No python reference to this field so no need to release.
    791     return 0;
    792   }
    793 
    794   if (InternalReleaseFieldByDescriptor(
    795           cmessage, existing_field, child_message) < 0) {
    796     return -1;
    797   }
    798   return PyDict_DelItemString(cmessage->composite_fields, field_name);
    799 #else
    800   return 0;
    801 #endif
    802 }
    803 
    804 // ---------------------------------------------------------------------
    805 // Making a message writable
    806 
    807 static Message* GetMutableMessage(
    808     CMessage* parent,
    809     const FieldDescriptor* parent_field) {
    810   Message* parent_message = parent->message;
    811   const Reflection* reflection = parent_message->GetReflection();
    812   if (MaybeReleaseOverlappingOneofField(parent, parent_field) < 0) {
    813     return NULL;
    814   }
    815   return reflection->MutableMessage(
    816       parent_message, parent_field, GetFactoryForMessage(parent));
    817 }
    818 
    819 struct FixupMessageReference : public ChildVisitor {
    820   // message must outlive this object.
    821   explicit FixupMessageReference(Message* message) :
    822       message_(message) {}
    823 
    824   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
    825     container->message = message_;
    826     return 0;
    827   }
    828 
    829   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
    830     container->message = message_;
    831     return 0;
    832   }
    833 
    834   int VisitMapContainer(MapContainer* container) {
    835     container->message = message_;
    836     return 0;
    837   }
    838 
    839  private:
    840   Message* message_;
    841 };
    842 
    843 int AssureWritable(CMessage* self) {
    844   if (self == NULL || !self->read_only) {
    845     return 0;
    846   }
    847 
    848   if (self->parent == NULL) {
    849     // If parent is NULL but we are trying to modify a read-only message, this
    850     // is a reference to a constant default instance that needs to be replaced
    851     // with a mutable top-level message.
    852     self->message = self->message->New();
    853     self->owner.reset(self->message);
    854     // Cascade the new owner to eventual children: even if this message is
    855     // empty, some submessages or repeated containers might exist already.
    856     SetOwner(self, self->owner);
    857   } else {
    858     // Otherwise, we need a mutable child message.
    859     if (AssureWritable(self->parent) == -1)
    860       return -1;
    861 
    862     // Make self->message writable.
    863     Message* mutable_message = GetMutableMessage(
    864         self->parent,
    865         self->parent_field_descriptor);
    866     if (mutable_message == NULL) {
    867       return -1;
    868     }
    869     self->message = mutable_message;
    870   }
    871   self->read_only = false;
    872 
    873   // When a CMessage is made writable its Message pointer is updated
    874   // to point to a new mutable Message.  When that happens we need to
    875   // update any references to the old, read-only CMessage.  There are
    876   // four places such references occur: RepeatedScalarContainer,
    877   // RepeatedCompositeContainer, MapContainer, and ExtensionDict.
    878   if (self->extensions != NULL)
    879     self->extensions->message = self->message;
    880   if (ForEachCompositeField(self, FixupMessageReference(self->message)) == -1)
    881     return -1;
    882 
    883   return 0;
    884 }
    885 
    886 // --- Globals:
    887 
    888 // Retrieve a C++ FieldDescriptor for a message attribute.
    889 // The C++ message must be valid.
    890 // TODO(amauryfa): This function should stay internal, because exception
    891 // handling is not consistent.
    892 static const FieldDescriptor* GetFieldDescriptor(
    893     CMessage* self, PyObject* name) {
    894   const Descriptor *message_descriptor = self->message->GetDescriptor();
    895   char* field_name;
    896   Py_ssize_t size;
    897   if (PyString_AsStringAndSize(name, &field_name, &size) < 0) {
    898     return NULL;
    899   }
    900   const FieldDescriptor *field_descriptor =
    901       message_descriptor->FindFieldByName(string(field_name, size));
    902   if (field_descriptor == NULL) {
    903     // Note: No exception is set!
    904     return NULL;
    905   }
    906   return field_descriptor;
    907 }
    908 
    909 // Retrieve a C++ FieldDescriptor for an extension handle.
    910 const FieldDescriptor* GetExtensionDescriptor(PyObject* extension) {
    911   ScopedPyObjectPtr cdescriptor;
    912   if (!PyObject_TypeCheck(extension, &PyFieldDescriptor_Type)) {
    913     // Most callers consider extensions as a plain dictionary.  We should
    914     // allow input which is not a field descriptor, and simply pretend it does
    915     // not exist.
    916     PyErr_SetObject(PyExc_KeyError, extension);
    917     return NULL;
    918   }
    919   return PyFieldDescriptor_AsDescriptor(extension);
    920 }
    921 
    922 // If value is a string, convert it into an enum value based on the labels in
    923 // descriptor, otherwise simply return value.  Always returns a new reference.
    924 static PyObject* GetIntegerEnumValue(const FieldDescriptor& descriptor,
    925                                      PyObject* value) {
    926   if (PyString_Check(value) || PyUnicode_Check(value)) {
    927     const EnumDescriptor* enum_descriptor = descriptor.enum_type();
    928     if (enum_descriptor == NULL) {
    929       PyErr_SetString(PyExc_TypeError, "not an enum field");
    930       return NULL;
    931     }
    932     char* enum_label;
    933     Py_ssize_t size;
    934     if (PyString_AsStringAndSize(value, &enum_label, &size) < 0) {
    935       return NULL;
    936     }
    937     const EnumValueDescriptor* enum_value_descriptor =
    938         enum_descriptor->FindValueByName(string(enum_label, size));
    939     if (enum_value_descriptor == NULL) {
    940       PyErr_SetString(PyExc_ValueError, "unknown enum label");
    941       return NULL;
    942     }
    943     return PyInt_FromLong(enum_value_descriptor->number());
    944   }
    945   Py_INCREF(value);
    946   return value;
    947 }
    948 
    949 // If cmessage_list is not NULL, this function releases values into the
    950 // container CMessages instead of just removing. Repeated composite container
    951 // needs to do this to make sure CMessages stay alive if they're still
    952 // referenced after deletion. Repeated scalar container doesn't need to worry.
    953 int InternalDeleteRepeatedField(
    954     CMessage* self,
    955     const FieldDescriptor* field_descriptor,
    956     PyObject* slice,
    957     PyObject* cmessage_list) {
    958   Message* message = self->message;
    959   Py_ssize_t length, from, to, step, slice_length;
    960   const Reflection* reflection = message->GetReflection();
    961   int min, max;
    962   length = reflection->FieldSize(*message, field_descriptor);
    963 
    964   if (PyInt_Check(slice) || PyLong_Check(slice)) {
    965     from = to = PyLong_AsLong(slice);
    966     if (from < 0) {
    967       from = to = length + from;
    968     }
    969     step = 1;
    970     min = max = from;
    971 
    972     // Range check.
    973     if (from < 0 || from >= length) {
    974       PyErr_Format(PyExc_IndexError, "list assignment index out of range");
    975       return -1;
    976     }
    977   } else if (PySlice_Check(slice)) {
    978     from = to = step = slice_length = 0;
    979     PySlice_GetIndicesEx(
    980 #if PY_MAJOR_VERSION < 3
    981         reinterpret_cast<PySliceObject*>(slice),
    982 #else
    983         slice,
    984 #endif
    985         length, &from, &to, &step, &slice_length);
    986     if (from < to) {
    987       min = from;
    988       max = to - 1;
    989     } else {
    990       min = to + 1;
    991       max = from;
    992     }
    993   } else {
    994     PyErr_SetString(PyExc_TypeError, "list indices must be integers");
    995     return -1;
    996   }
    997 
    998   Py_ssize_t i = from;
    999   std::vector<bool> to_delete(length, false);
   1000   while (i >= min && i <= max) {
   1001     to_delete[i] = true;
   1002     i += step;
   1003   }
   1004 
   1005   to = 0;
   1006   for (i = 0; i < length; ++i) {
   1007     if (!to_delete[i]) {
   1008       if (i != to) {
   1009         reflection->SwapElements(message, field_descriptor, i, to);
   1010         if (cmessage_list != NULL) {
   1011           // If a list of cmessages is passed in (i.e. from a repeated
   1012           // composite container), swap those as well to correspond to the
   1013           // swaps in the underlying message so they're in the right order
   1014           // when we start releasing.
   1015           PyObject* tmp = PyList_GET_ITEM(cmessage_list, i);
   1016           PyList_SET_ITEM(cmessage_list, i,
   1017                           PyList_GET_ITEM(cmessage_list, to));
   1018           PyList_SET_ITEM(cmessage_list, to, tmp);
   1019         }
   1020       }
   1021       ++to;
   1022     }
   1023   }
   1024 
   1025   while (i > to) {
   1026     if (cmessage_list == NULL) {
   1027       reflection->RemoveLast(message, field_descriptor);
   1028     } else {
   1029       CMessage* last_cmessage = reinterpret_cast<CMessage*>(
   1030           PyList_GET_ITEM(cmessage_list, PyList_GET_SIZE(cmessage_list) - 1));
   1031       repeated_composite_container::ReleaseLastTo(
   1032           self, field_descriptor, last_cmessage);
   1033       if (PySequence_DelItem(cmessage_list, -1) < 0) {
   1034         return -1;
   1035       }
   1036     }
   1037     --i;
   1038   }
   1039 
   1040   return 0;
   1041 }
   1042 
   1043 // Initializes fields of a message. Used in constructors.
   1044 int InitAttributes(CMessage* self, PyObject* kwargs) {
   1045   if (kwargs == NULL) {
   1046     return 0;
   1047   }
   1048 
   1049   Py_ssize_t pos = 0;
   1050   PyObject* name;
   1051   PyObject* value;
   1052   while (PyDict_Next(kwargs, &pos, &name, &value)) {
   1053     if (!PyString_Check(name)) {
   1054       PyErr_SetString(PyExc_ValueError, "Field name must be a string");
   1055       return -1;
   1056     }
   1057     const FieldDescriptor* descriptor = GetFieldDescriptor(self, name);
   1058     if (descriptor == NULL) {
   1059       PyErr_Format(PyExc_ValueError, "Protocol message %s has no \"%s\" field.",
   1060                    self->message->GetDescriptor()->name().c_str(),
   1061                    PyString_AsString(name));
   1062       return -1;
   1063     }
   1064     if (value == Py_None) {
   1065       // field=None is the same as no field at all.
   1066       continue;
   1067     }
   1068     if (descriptor->is_map()) {
   1069       ScopedPyObjectPtr map(GetAttr(self, name));
   1070       const FieldDescriptor* value_descriptor =
   1071           descriptor->message_type()->FindFieldByName("value");
   1072       if (value_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   1073         Py_ssize_t map_pos = 0;
   1074         PyObject* map_key;
   1075         PyObject* map_value;
   1076         while (PyDict_Next(value, &map_pos, &map_key, &map_value)) {
   1077           ScopedPyObjectPtr function_return;
   1078           function_return.reset(PyObject_GetItem(map.get(), map_key));
   1079           if (function_return.get() == NULL) {
   1080             return -1;
   1081           }
   1082           ScopedPyObjectPtr ok(PyObject_CallMethod(
   1083               function_return.get(), "MergeFrom", "O", map_value));
   1084           if (ok.get() == NULL) {
   1085             return -1;
   1086           }
   1087         }
   1088       } else {
   1089         ScopedPyObjectPtr function_return;
   1090         function_return.reset(
   1091             PyObject_CallMethod(map.get(), "update", "O", value));
   1092         if (function_return.get() == NULL) {
   1093           return -1;
   1094         }
   1095       }
   1096     } else if (descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
   1097       ScopedPyObjectPtr container(GetAttr(self, name));
   1098       if (container == NULL) {
   1099         return -1;
   1100       }
   1101       if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   1102         RepeatedCompositeContainer* rc_container =
   1103             reinterpret_cast<RepeatedCompositeContainer*>(container.get());
   1104         ScopedPyObjectPtr iter(PyObject_GetIter(value));
   1105         if (iter == NULL) {
   1106           PyErr_SetString(PyExc_TypeError, "Value must be iterable");
   1107           return -1;
   1108         }
   1109         ScopedPyObjectPtr next;
   1110         while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
   1111           PyObject* kwargs = (PyDict_Check(next.get()) ? next.get() : NULL);
   1112           ScopedPyObjectPtr new_msg(
   1113               repeated_composite_container::Add(rc_container, NULL, kwargs));
   1114           if (new_msg == NULL) {
   1115             return -1;
   1116           }
   1117           if (kwargs == NULL) {
   1118             // next was not a dict, it's a message we need to merge
   1119             ScopedPyObjectPtr merged(MergeFrom(
   1120                 reinterpret_cast<CMessage*>(new_msg.get()), next.get()));
   1121             if (merged.get() == NULL) {
   1122               return -1;
   1123             }
   1124           }
   1125         }
   1126         if (PyErr_Occurred()) {
   1127           // Check to see how PyIter_Next() exited.
   1128           return -1;
   1129         }
   1130       } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
   1131         RepeatedScalarContainer* rs_container =
   1132             reinterpret_cast<RepeatedScalarContainer*>(container.get());
   1133         ScopedPyObjectPtr iter(PyObject_GetIter(value));
   1134         if (iter == NULL) {
   1135           PyErr_SetString(PyExc_TypeError, "Value must be iterable");
   1136           return -1;
   1137         }
   1138         ScopedPyObjectPtr next;
   1139         while ((next.reset(PyIter_Next(iter.get()))) != NULL) {
   1140           ScopedPyObjectPtr enum_value(
   1141               GetIntegerEnumValue(*descriptor, next.get()));
   1142           if (enum_value == NULL) {
   1143             return -1;
   1144           }
   1145           ScopedPyObjectPtr new_msg(repeated_scalar_container::Append(
   1146               rs_container, enum_value.get()));
   1147           if (new_msg == NULL) {
   1148             return -1;
   1149           }
   1150         }
   1151         if (PyErr_Occurred()) {
   1152           // Check to see how PyIter_Next() exited.
   1153           return -1;
   1154         }
   1155       } else {
   1156         if (ScopedPyObjectPtr(repeated_scalar_container::Extend(
   1157                 reinterpret_cast<RepeatedScalarContainer*>(container.get()),
   1158                 value)) ==
   1159             NULL) {
   1160           return -1;
   1161         }
   1162       }
   1163     } else if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   1164       ScopedPyObjectPtr message(GetAttr(self, name));
   1165       if (message == NULL) {
   1166         return -1;
   1167       }
   1168       CMessage* cmessage = reinterpret_cast<CMessage*>(message.get());
   1169       if (PyDict_Check(value)) {
   1170         if (InitAttributes(cmessage, value) < 0) {
   1171           return -1;
   1172         }
   1173       } else {
   1174         ScopedPyObjectPtr merged(MergeFrom(cmessage, value));
   1175         if (merged == NULL) {
   1176           return -1;
   1177         }
   1178       }
   1179     } else {
   1180       ScopedPyObjectPtr new_val;
   1181       if (descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
   1182         new_val.reset(GetIntegerEnumValue(*descriptor, value));
   1183         if (new_val == NULL) {
   1184           return -1;
   1185         }
   1186       }
   1187       if (SetAttr(self, name, (new_val.get() == NULL) ? value : new_val.get()) <
   1188           0) {
   1189         return -1;
   1190       }
   1191     }
   1192   }
   1193   return 0;
   1194 }
   1195 
   1196 // Allocates an incomplete Python Message: the caller must fill self->message,
   1197 // self->owner and eventually self->parent.
   1198 CMessage* NewEmptyMessage(CMessageClass* type) {
   1199   CMessage* self = reinterpret_cast<CMessage*>(
   1200       PyType_GenericAlloc(&type->super.ht_type, 0));
   1201   if (self == NULL) {
   1202     return NULL;
   1203   }
   1204 
   1205   self->message = NULL;
   1206   self->parent = NULL;
   1207   self->parent_field_descriptor = NULL;
   1208   self->read_only = false;
   1209   self->extensions = NULL;
   1210 
   1211   self->composite_fields = NULL;
   1212 
   1213   return self;
   1214 }
   1215 
   1216 // The __new__ method of Message classes.
   1217 // Creates a new C++ message and takes ownership.
   1218 static PyObject* New(PyTypeObject* cls,
   1219                      PyObject* unused_args, PyObject* unused_kwargs) {
   1220   CMessageClass* type = CheckMessageClass(cls);
   1221   if (type == NULL) {
   1222     return NULL;
   1223   }
   1224   // Retrieve the message descriptor and the default instance (=prototype).
   1225   const Descriptor* message_descriptor = type->message_descriptor;
   1226   if (message_descriptor == NULL) {
   1227     return NULL;
   1228   }
   1229   const Message* default_message = type->py_descriptor_pool->message_factory
   1230                                    ->GetPrototype(message_descriptor);
   1231   if (default_message == NULL) {
   1232     PyErr_SetString(PyExc_TypeError, message_descriptor->full_name().c_str());
   1233     return NULL;
   1234   }
   1235 
   1236   CMessage* self = NewEmptyMessage(type);
   1237   if (self == NULL) {
   1238     return NULL;
   1239   }
   1240   self->message = default_message->New();
   1241   self->owner.reset(self->message);
   1242   return reinterpret_cast<PyObject*>(self);
   1243 }
   1244 
   1245 // The __init__ method of Message classes.
   1246 // It initializes fields from keywords passed to the constructor.
   1247 static int Init(CMessage* self, PyObject* args, PyObject* kwargs) {
   1248   if (PyTuple_Size(args) != 0) {
   1249     PyErr_SetString(PyExc_TypeError, "No positional arguments allowed");
   1250     return -1;
   1251   }
   1252 
   1253   return InitAttributes(self, kwargs);
   1254 }
   1255 
   1256 // ---------------------------------------------------------------------
   1257 // Deallocating a CMessage
   1258 //
   1259 // Deallocating a CMessage requires that we clear any weak references
   1260 // from children to the message being deallocated.
   1261 
   1262 // Clear the weak reference from the child to the parent.
   1263 struct ClearWeakReferences : public ChildVisitor {
   1264   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
   1265     container->parent = NULL;
   1266     // The elements in the container have the same parent as the
   1267     // container itself, so NULL out that pointer as well.
   1268     const Py_ssize_t n = PyList_GET_SIZE(container->child_messages);
   1269     for (Py_ssize_t i = 0; i < n; ++i) {
   1270       CMessage* child_cmessage = reinterpret_cast<CMessage*>(
   1271           PyList_GET_ITEM(container->child_messages, i));
   1272       child_cmessage->parent = NULL;
   1273     }
   1274     return 0;
   1275   }
   1276 
   1277   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
   1278     container->parent = NULL;
   1279     return 0;
   1280   }
   1281 
   1282   int VisitMapContainer(MapContainer* container) {
   1283     container->parent = NULL;
   1284     return 0;
   1285   }
   1286 
   1287   int VisitCMessage(CMessage* cmessage,
   1288                     const FieldDescriptor* field_descriptor) {
   1289     cmessage->parent = NULL;
   1290     return 0;
   1291   }
   1292 };
   1293 
   1294 static void Dealloc(CMessage* self) {
   1295   // Null out all weak references from children to this message.
   1296   GOOGLE_CHECK_EQ(0, ForEachCompositeField(self, ClearWeakReferences()));
   1297   if (self->extensions) {
   1298     self->extensions->parent = NULL;
   1299   }
   1300 
   1301   Py_CLEAR(self->extensions);
   1302   Py_CLEAR(self->composite_fields);
   1303   self->owner.reset();
   1304   Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
   1305 }
   1306 
   1307 // ---------------------------------------------------------------------
   1308 
   1309 
   1310 PyObject* IsInitialized(CMessage* self, PyObject* args) {
   1311   PyObject* errors = NULL;
   1312   if (PyArg_ParseTuple(args, "|O", &errors) < 0) {
   1313     return NULL;
   1314   }
   1315   if (self->message->IsInitialized()) {
   1316     Py_RETURN_TRUE;
   1317   }
   1318   if (errors != NULL) {
   1319     ScopedPyObjectPtr initialization_errors(
   1320         FindInitializationErrors(self));
   1321     if (initialization_errors == NULL) {
   1322       return NULL;
   1323     }
   1324     ScopedPyObjectPtr extend_name(PyString_FromString("extend"));
   1325     if (extend_name == NULL) {
   1326       return NULL;
   1327     }
   1328     ScopedPyObjectPtr result(PyObject_CallMethodObjArgs(
   1329         errors,
   1330         extend_name.get(),
   1331         initialization_errors.get(),
   1332         NULL));
   1333     if (result == NULL) {
   1334       return NULL;
   1335     }
   1336   }
   1337   Py_RETURN_FALSE;
   1338 }
   1339 
   1340 PyObject* HasFieldByDescriptor(
   1341     CMessage* self, const FieldDescriptor* field_descriptor) {
   1342   Message* message = self->message;
   1343   if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
   1344     return NULL;
   1345   }
   1346   if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
   1347     PyErr_SetString(PyExc_KeyError,
   1348                     "Field is repeated. A singular method is required.");
   1349     return NULL;
   1350   }
   1351   bool has_field =
   1352       message->GetReflection()->HasField(*message, field_descriptor);
   1353   return PyBool_FromLong(has_field ? 1 : 0);
   1354 }
   1355 
   1356 const FieldDescriptor* FindFieldWithOneofs(
   1357     const Message* message, const string& field_name, bool* in_oneof) {
   1358   *in_oneof = false;
   1359   const Descriptor* descriptor = message->GetDescriptor();
   1360   const FieldDescriptor* field_descriptor =
   1361       descriptor->FindFieldByName(field_name);
   1362   if (field_descriptor != NULL) {
   1363     return field_descriptor;
   1364   }
   1365   const OneofDescriptor* oneof_desc =
   1366       descriptor->FindOneofByName(field_name);
   1367   if (oneof_desc != NULL) {
   1368     *in_oneof = true;
   1369     return message->GetReflection()->GetOneofFieldDescriptor(*message,
   1370                                                              oneof_desc);
   1371   }
   1372   return NULL;
   1373 }
   1374 
   1375 bool CheckHasPresence(const FieldDescriptor* field_descriptor, bool in_oneof) {
   1376   if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
   1377     PyErr_Format(PyExc_ValueError,
   1378                  "Protocol message has no singular \"%s\" field.",
   1379                  field_descriptor->name().c_str());
   1380     return false;
   1381   }
   1382 
   1383   if (field_descriptor->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
   1384     // HasField() for a oneof *itself* isn't supported.
   1385     if (in_oneof) {
   1386       PyErr_Format(PyExc_ValueError,
   1387                    "Can't test oneof field \"%s\" for presence in proto3, use "
   1388                    "WhichOneof instead.",
   1389                    field_descriptor->containing_oneof()->name().c_str());
   1390       return false;
   1391     }
   1392 
   1393     // ...but HasField() for fields *in* a oneof is supported.
   1394     if (field_descriptor->containing_oneof() != NULL) {
   1395       return true;
   1396     }
   1397 
   1398     if (field_descriptor->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
   1399       PyErr_Format(
   1400           PyExc_ValueError,
   1401           "Can't test non-submessage field \"%s\" for presence in proto3.",
   1402           field_descriptor->name().c_str());
   1403       return false;
   1404     }
   1405   }
   1406 
   1407   return true;
   1408 }
   1409 
   1410 PyObject* HasField(CMessage* self, PyObject* arg) {
   1411   char* field_name;
   1412   Py_ssize_t size;
   1413 #if PY_MAJOR_VERSION < 3
   1414   if (PyString_AsStringAndSize(arg, &field_name, &size) < 0) {
   1415     return NULL;
   1416   }
   1417 #else
   1418   field_name = PyUnicode_AsUTF8AndSize(arg, &size);
   1419   if (!field_name) {
   1420     return NULL;
   1421   }
   1422 #endif
   1423 
   1424   Message* message = self->message;
   1425   bool is_in_oneof;
   1426   const FieldDescriptor* field_descriptor =
   1427       FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
   1428   if (field_descriptor == NULL) {
   1429     if (!is_in_oneof) {
   1430       PyErr_Format(PyExc_ValueError, "Unknown field %s.", field_name);
   1431       return NULL;
   1432     } else {
   1433       Py_RETURN_FALSE;
   1434     }
   1435   }
   1436 
   1437   if (!CheckHasPresence(field_descriptor, is_in_oneof)) {
   1438     return NULL;
   1439   }
   1440 
   1441   if (message->GetReflection()->HasField(*message, field_descriptor)) {
   1442     Py_RETURN_TRUE;
   1443   }
   1444   if (!message->GetReflection()->SupportsUnknownEnumValues() &&
   1445       field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM) {
   1446     // Special case: Python HasField() differs in semantics from C++
   1447     // slightly: we return HasField('enum_field') == true if there is
   1448     // an unknown enum value present. To implement this we have to
   1449     // look in the UnknownFieldSet.
   1450     const UnknownFieldSet& unknown_field_set =
   1451         message->GetReflection()->GetUnknownFields(*message);
   1452     for (int i = 0; i < unknown_field_set.field_count(); ++i) {
   1453       if (unknown_field_set.field(i).number() == field_descriptor->number()) {
   1454         Py_RETURN_TRUE;
   1455       }
   1456     }
   1457   }
   1458   Py_RETURN_FALSE;
   1459 }
   1460 
   1461 PyObject* ClearExtension(CMessage* self, PyObject* extension) {
   1462   if (self->extensions != NULL) {
   1463     return extension_dict::ClearExtension(self->extensions, extension);
   1464   } else {
   1465     const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
   1466     if (descriptor == NULL) {
   1467       return NULL;
   1468     }
   1469     if (ScopedPyObjectPtr(ClearFieldByDescriptor(self, descriptor)) == NULL) {
   1470       return NULL;
   1471     }
   1472   }
   1473   Py_RETURN_NONE;
   1474 }
   1475 
   1476 PyObject* HasExtension(CMessage* self, PyObject* extension) {
   1477   const FieldDescriptor* descriptor = GetExtensionDescriptor(extension);
   1478   if (descriptor == NULL) {
   1479     return NULL;
   1480   }
   1481   return HasFieldByDescriptor(self, descriptor);
   1482 }
   1483 
   1484 // ---------------------------------------------------------------------
   1485 // Releasing messages
   1486 //
   1487 // The Python API's ClearField() and Clear() methods behave
   1488 // differently than their C++ counterparts.  While the C++ versions
   1489 // clears the children the Python versions detaches the children,
   1490 // without touching their content.  This impedance mismatch causes
   1491 // some complexity in the implementation, which is captured in this
   1492 // section.
   1493 //
   1494 // When a CMessage field is cleared we need to:
   1495 //
   1496 // * Release the Message used as the backing store for the CMessage
   1497 //   from its parent.
   1498 //
   1499 // * Change the owner field of the released CMessage and all of its
   1500 //   children to point to the newly released Message.
   1501 //
   1502 // * Clear the weak references from the released CMessage to the
   1503 //   parent.
   1504 //
   1505 // When a RepeatedCompositeContainer field is cleared we need to:
   1506 //
   1507 // * Release all the Message used as the backing store for the
   1508 //   CMessages stored in the container.
   1509 //
   1510 // * Change the owner field of all the released CMessage and all of
   1511 //   their children to point to the newly released Messages.
   1512 //
   1513 // * Clear the weak references from the released container to the
   1514 //   parent.
   1515 
   1516 struct SetOwnerVisitor : public ChildVisitor {
   1517   // new_owner must outlive this object.
   1518   explicit SetOwnerVisitor(const shared_ptr<Message>& new_owner)
   1519       : new_owner_(new_owner) {}
   1520 
   1521   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
   1522     repeated_composite_container::SetOwner(container, new_owner_);
   1523     return 0;
   1524   }
   1525 
   1526   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
   1527     repeated_scalar_container::SetOwner(container, new_owner_);
   1528     return 0;
   1529   }
   1530 
   1531   int VisitMapContainer(MapContainer* container) {
   1532     container->SetOwner(new_owner_);
   1533     return 0;
   1534   }
   1535 
   1536   int VisitCMessage(CMessage* cmessage,
   1537                     const FieldDescriptor* field_descriptor) {
   1538     return SetOwner(cmessage, new_owner_);
   1539   }
   1540 
   1541  private:
   1542   const shared_ptr<Message>& new_owner_;
   1543 };
   1544 
   1545 // Change the owner of this CMessage and all its children, recursively.
   1546 int SetOwner(CMessage* self, const shared_ptr<Message>& new_owner) {
   1547   self->owner = new_owner;
   1548   if (ForEachCompositeField(self, SetOwnerVisitor(new_owner)) == -1)
   1549     return -1;
   1550   return 0;
   1551 }
   1552 
   1553 // Releases the message specified by 'field' and returns the
   1554 // pointer. If the field does not exist a new message is created using
   1555 // 'descriptor'. The caller takes ownership of the returned pointer.
   1556 Message* ReleaseMessage(CMessage* self,
   1557                         const Descriptor* descriptor,
   1558                         const FieldDescriptor* field_descriptor) {
   1559   MessageFactory* message_factory = GetFactoryForMessage(self);
   1560   Message* released_message = self->message->GetReflection()->ReleaseMessage(
   1561       self->message, field_descriptor, message_factory);
   1562   // ReleaseMessage will return NULL which differs from
   1563   // child_cmessage->message, if the field does not exist.  In this case,
   1564   // the latter points to the default instance via a const_cast<>, so we
   1565   // have to reset it to a new mutable object since we are taking ownership.
   1566   if (released_message == NULL) {
   1567     const Message* prototype = message_factory->GetPrototype(descriptor);
   1568     GOOGLE_DCHECK(prototype != NULL);
   1569     released_message = prototype->New();
   1570   }
   1571 
   1572   return released_message;
   1573 }
   1574 
   1575 int ReleaseSubMessage(CMessage* self,
   1576                       const FieldDescriptor* field_descriptor,
   1577                       CMessage* child_cmessage) {
   1578   // Release the Message
   1579   shared_ptr<Message> released_message(ReleaseMessage(
   1580       self, child_cmessage->message->GetDescriptor(), field_descriptor));
   1581   child_cmessage->message = released_message.get();
   1582   child_cmessage->owner.swap(released_message);
   1583   child_cmessage->parent = NULL;
   1584   child_cmessage->parent_field_descriptor = NULL;
   1585   child_cmessage->read_only = false;
   1586   return ForEachCompositeField(child_cmessage,
   1587                                SetOwnerVisitor(child_cmessage->owner));
   1588 }
   1589 
   1590 struct ReleaseChild : public ChildVisitor {
   1591   // message must outlive this object.
   1592   explicit ReleaseChild(CMessage* parent) :
   1593       parent_(parent) {}
   1594 
   1595   int VisitRepeatedCompositeContainer(RepeatedCompositeContainer* container) {
   1596     return repeated_composite_container::Release(
   1597         reinterpret_cast<RepeatedCompositeContainer*>(container));
   1598   }
   1599 
   1600   int VisitRepeatedScalarContainer(RepeatedScalarContainer* container) {
   1601     return repeated_scalar_container::Release(
   1602         reinterpret_cast<RepeatedScalarContainer*>(container));
   1603   }
   1604 
   1605   int VisitMapContainer(MapContainer* container) {
   1606     return reinterpret_cast<MapContainer*>(container)->Release();
   1607   }
   1608 
   1609   int VisitCMessage(CMessage* cmessage,
   1610                     const FieldDescriptor* field_descriptor) {
   1611     return ReleaseSubMessage(parent_, field_descriptor,
   1612         reinterpret_cast<CMessage*>(cmessage));
   1613   }
   1614 
   1615   CMessage* parent_;
   1616 };
   1617 
   1618 int InternalReleaseFieldByDescriptor(
   1619     CMessage* self,
   1620     const FieldDescriptor* field_descriptor,
   1621     PyObject* composite_field) {
   1622   return VisitCompositeField(
   1623       field_descriptor,
   1624       composite_field,
   1625       ReleaseChild(self));
   1626 }
   1627 
   1628 PyObject* ClearFieldByDescriptor(
   1629     CMessage* self,
   1630     const FieldDescriptor* descriptor) {
   1631   if (!CheckFieldBelongsToMessage(descriptor, self->message)) {
   1632     return NULL;
   1633   }
   1634   AssureWritable(self);
   1635   self->message->GetReflection()->ClearField(self->message, descriptor);
   1636   Py_RETURN_NONE;
   1637 }
   1638 
   1639 PyObject* ClearField(CMessage* self, PyObject* arg) {
   1640   if (!PyString_Check(arg)) {
   1641     PyErr_SetString(PyExc_TypeError, "field name must be a string");
   1642     return NULL;
   1643   }
   1644 #if PY_MAJOR_VERSION < 3
   1645   const char* field_name = PyString_AS_STRING(arg);
   1646   Py_ssize_t size = PyString_GET_SIZE(arg);
   1647 #else
   1648   Py_ssize_t size;
   1649   const char* field_name = PyUnicode_AsUTF8AndSize(arg, &size);
   1650 #endif
   1651   AssureWritable(self);
   1652   Message* message = self->message;
   1653   ScopedPyObjectPtr arg_in_oneof;
   1654   bool is_in_oneof;
   1655   const FieldDescriptor* field_descriptor =
   1656       FindFieldWithOneofs(message, string(field_name, size), &is_in_oneof);
   1657   if (field_descriptor == NULL) {
   1658     if (!is_in_oneof) {
   1659       PyErr_Format(PyExc_ValueError,
   1660                    "Protocol message has no \"%s\" field.", field_name);
   1661       return NULL;
   1662     } else {
   1663       Py_RETURN_NONE;
   1664     }
   1665   } else if (is_in_oneof) {
   1666     const string& name = field_descriptor->name();
   1667     arg_in_oneof.reset(PyString_FromStringAndSize(name.c_str(), name.size()));
   1668     arg = arg_in_oneof.get();
   1669   }
   1670 
   1671   PyObject* composite_field = self->composite_fields ?
   1672       PyDict_GetItem(self->composite_fields, arg) : NULL;
   1673 
   1674   // Only release the field if there's a possibility that there are
   1675   // references to it.
   1676   if (composite_field != NULL) {
   1677     if (InternalReleaseFieldByDescriptor(self, field_descriptor,
   1678                                          composite_field) < 0) {
   1679       return NULL;
   1680     }
   1681     PyDict_DelItem(self->composite_fields, arg);
   1682   }
   1683   message->GetReflection()->ClearField(message, field_descriptor);
   1684   if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_ENUM &&
   1685       !message->GetReflection()->SupportsUnknownEnumValues()) {
   1686     UnknownFieldSet* unknown_field_set =
   1687         message->GetReflection()->MutableUnknownFields(message);
   1688     unknown_field_set->DeleteByNumber(field_descriptor->number());
   1689   }
   1690 
   1691   Py_RETURN_NONE;
   1692 }
   1693 
   1694 PyObject* Clear(CMessage* self) {
   1695   AssureWritable(self);
   1696   if (ForEachCompositeField(self, ReleaseChild(self)) == -1)
   1697     return NULL;
   1698   Py_CLEAR(self->extensions);
   1699   if (self->composite_fields) {
   1700     PyDict_Clear(self->composite_fields);
   1701   }
   1702   self->message->Clear();
   1703   Py_RETURN_NONE;
   1704 }
   1705 
   1706 // ---------------------------------------------------------------------
   1707 
   1708 static string GetMessageName(CMessage* self) {
   1709   if (self->parent_field_descriptor != NULL) {
   1710     return self->parent_field_descriptor->full_name();
   1711   } else {
   1712     return self->message->GetDescriptor()->full_name();
   1713   }
   1714 }
   1715 
   1716 static PyObject* SerializeToString(CMessage* self, PyObject* args) {
   1717   if (!self->message->IsInitialized()) {
   1718     ScopedPyObjectPtr errors(FindInitializationErrors(self));
   1719     if (errors == NULL) {
   1720       return NULL;
   1721     }
   1722     ScopedPyObjectPtr comma(PyString_FromString(","));
   1723     if (comma == NULL) {
   1724       return NULL;
   1725     }
   1726     ScopedPyObjectPtr joined(
   1727         PyObject_CallMethod(comma.get(), "join", "O", errors.get()));
   1728     if (joined == NULL) {
   1729       return NULL;
   1730     }
   1731 
   1732     // TODO(haberman): this is a (hopefully temporary) hack.  The unit testing
   1733     // infrastructure reloads all pure-Python modules for every test, but not
   1734     // C++ modules (because that's generally impossible:
   1735     // http://bugs.python.org/issue1144263).  But if we cache EncodeError, we'll
   1736     // return the EncodeError from a previous load of the module, which won't
   1737     // match a user's attempt to catch EncodeError.  So we have to look it up
   1738     // again every time.
   1739     ScopedPyObjectPtr message_module(PyImport_ImportModule(
   1740         "google.protobuf.message"));
   1741     if (message_module.get() == NULL) {
   1742       return NULL;
   1743     }
   1744 
   1745     ScopedPyObjectPtr encode_error(
   1746         PyObject_GetAttrString(message_module.get(), "EncodeError"));
   1747     if (encode_error.get() == NULL) {
   1748       return NULL;
   1749     }
   1750     PyErr_Format(encode_error.get(),
   1751                  "Message %s is missing required fields: %s",
   1752                  GetMessageName(self).c_str(), PyString_AsString(joined.get()));
   1753     return NULL;
   1754   }
   1755   int size = self->message->ByteSize();
   1756   if (size <= 0) {
   1757     return PyBytes_FromString("");
   1758   }
   1759   PyObject* result = PyBytes_FromStringAndSize(NULL, size);
   1760   if (result == NULL) {
   1761     return NULL;
   1762   }
   1763   char* buffer = PyBytes_AS_STRING(result);
   1764   self->message->SerializeWithCachedSizesToArray(
   1765       reinterpret_cast<uint8*>(buffer));
   1766   return result;
   1767 }
   1768 
   1769 static PyObject* SerializePartialToString(CMessage* self) {
   1770   string contents;
   1771   self->message->SerializePartialToString(&contents);
   1772   return PyBytes_FromStringAndSize(contents.c_str(), contents.size());
   1773 }
   1774 
   1775 // Formats proto fields for ascii dumps using python formatting functions where
   1776 // appropriate.
   1777 class PythonFieldValuePrinter : public TextFormat::FieldValuePrinter {
   1778  public:
   1779   // Python has some differences from C++ when printing floating point numbers.
   1780   //
   1781   // 1) Trailing .0 is always printed.
   1782   // 2) (Python2) Output is rounded to 12 digits.
   1783   // 3) (Python3) The full precision of the double is preserved (and Python uses
   1784   //    David M. Gay's dtoa(), when the C++ code uses SimpleDtoa. There are some
   1785   //    differences, but they rarely happen)
   1786   //
   1787   // We override floating point printing with the C-API function for printing
   1788   // Python floats to ensure consistency.
   1789   string PrintFloat(float value) const { return PrintDouble(value); }
   1790   string PrintDouble(double value) const {
   1791     // This implementation is not highly optimized (it allocates two temporary
   1792     // Python objects) but it is simple and portable.  If this is shown to be a
   1793     // performance bottleneck, we can optimize it, but the results will likely
   1794     // be more complicated to accommodate the differing behavior of double
   1795     // formatting between Python 2 and Python 3.
   1796     //
   1797     // (Though a valid question is: do we really want to make out output
   1798     // dependent on the Python version?)
   1799     ScopedPyObjectPtr py_value(PyFloat_FromDouble(value));
   1800     if (!py_value.get()) {
   1801       return string();
   1802     }
   1803 
   1804     ScopedPyObjectPtr py_str(PyObject_Str(py_value.get()));
   1805     if (!py_str.get()) {
   1806       return string();
   1807     }
   1808 
   1809     return string(PyString_AsString(py_str.get()));
   1810   }
   1811 };
   1812 
   1813 static PyObject* ToStr(CMessage* self) {
   1814   TextFormat::Printer printer;
   1815   // Passes ownership
   1816   printer.SetDefaultFieldValuePrinter(new PythonFieldValuePrinter());
   1817   printer.SetHideUnknownFields(true);
   1818   string output;
   1819   if (!printer.PrintToString(*self->message, &output)) {
   1820     PyErr_SetString(PyExc_ValueError, "Unable to convert message to str");
   1821     return NULL;
   1822   }
   1823   return PyString_FromString(output.c_str());
   1824 }
   1825 
   1826 PyObject* MergeFrom(CMessage* self, PyObject* arg) {
   1827   CMessage* other_message;
   1828   if (!PyObject_TypeCheck(arg, &CMessage_Type)) {
   1829     PyErr_Format(PyExc_TypeError,
   1830                  "Parameter to MergeFrom() must be instance of same class: "
   1831                  "expected %s got %s.",
   1832                  self->message->GetDescriptor()->full_name().c_str(),
   1833                  Py_TYPE(arg)->tp_name);
   1834     return NULL;
   1835   }
   1836 
   1837   other_message = reinterpret_cast<CMessage*>(arg);
   1838   if (other_message->message->GetDescriptor() !=
   1839       self->message->GetDescriptor()) {
   1840     PyErr_Format(PyExc_TypeError,
   1841                  "Parameter to MergeFrom() must be instance of same class: "
   1842                  "expected %s got %s.",
   1843                  self->message->GetDescriptor()->full_name().c_str(),
   1844                  other_message->message->GetDescriptor()->full_name().c_str());
   1845     return NULL;
   1846   }
   1847   AssureWritable(self);
   1848 
   1849   // TODO(tibell): Message::MergeFrom might turn some child Messages
   1850   // into mutable messages, invalidating the message field in the
   1851   // corresponding CMessages.  We should run a FixupMessageReferences
   1852   // pass here.
   1853 
   1854   self->message->MergeFrom(*other_message->message);
   1855   Py_RETURN_NONE;
   1856 }
   1857 
   1858 static PyObject* CopyFrom(CMessage* self, PyObject* arg) {
   1859   CMessage* other_message;
   1860   if (!PyObject_TypeCheck(arg, &CMessage_Type)) {
   1861     PyErr_Format(PyExc_TypeError,
   1862                  "Parameter to CopyFrom() must be instance of same class: "
   1863                  "expected %s got %s.",
   1864                  self->message->GetDescriptor()->full_name().c_str(),
   1865                  Py_TYPE(arg)->tp_name);
   1866     return NULL;
   1867   }
   1868 
   1869   other_message = reinterpret_cast<CMessage*>(arg);
   1870 
   1871   if (self == other_message) {
   1872     Py_RETURN_NONE;
   1873   }
   1874 
   1875   if (other_message->message->GetDescriptor() !=
   1876       self->message->GetDescriptor()) {
   1877     PyErr_Format(PyExc_TypeError,
   1878                  "Parameter to CopyFrom() must be instance of same class: "
   1879                  "expected %s got %s.",
   1880                  self->message->GetDescriptor()->full_name().c_str(),
   1881                  other_message->message->GetDescriptor()->full_name().c_str());
   1882     return NULL;
   1883   }
   1884 
   1885   AssureWritable(self);
   1886 
   1887   // CopyFrom on the message will not clean up self->composite_fields,
   1888   // which can leave us in an inconsistent state, so clear it out here.
   1889   (void)ScopedPyObjectPtr(Clear(self));
   1890 
   1891   self->message->CopyFrom(*other_message->message);
   1892 
   1893   Py_RETURN_NONE;
   1894 }
   1895 
   1896 // Protobuf has a 64MB limit built in, this variable will override this. Please
   1897 // do not enable this unless you fully understand the implications: protobufs
   1898 // must all be kept in memory at the same time, so if they grow too big you may
   1899 // get OOM errors. The protobuf APIs do not provide any tools for processing
   1900 // protobufs in chunks.  If you have protos this big you should break them up if
   1901 // it is at all convenient to do so.
   1902 static bool allow_oversize_protos = false;
   1903 
   1904 // Provide a method in the module to set allow_oversize_protos to a boolean
   1905 // value. This method returns the newly value of allow_oversize_protos.
   1906 static PyObject* SetAllowOversizeProtos(PyObject* m, PyObject* arg) {
   1907   if (!arg || !PyBool_Check(arg)) {
   1908     PyErr_SetString(PyExc_TypeError,
   1909                     "Argument to SetAllowOversizeProtos must be boolean");
   1910     return NULL;
   1911   }
   1912   allow_oversize_protos = PyObject_IsTrue(arg);
   1913   if (allow_oversize_protos) {
   1914     Py_RETURN_TRUE;
   1915   } else {
   1916     Py_RETURN_FALSE;
   1917   }
   1918 }
   1919 
   1920 static PyObject* MergeFromString(CMessage* self, PyObject* arg) {
   1921   const void* data;
   1922   Py_ssize_t data_length;
   1923   if (PyObject_AsReadBuffer(arg, &data, &data_length) < 0) {
   1924     return NULL;
   1925   }
   1926 
   1927   AssureWritable(self);
   1928   io::CodedInputStream input(
   1929       reinterpret_cast<const uint8*>(data), data_length);
   1930   if (allow_oversize_protos) {
   1931     input.SetTotalBytesLimit(INT_MAX, INT_MAX);
   1932   }
   1933   PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
   1934   input.SetExtensionRegistry(pool->pool, pool->message_factory);
   1935   bool success = self->message->MergePartialFromCodedStream(&input);
   1936   if (success) {
   1937     return PyInt_FromLong(input.CurrentPosition());
   1938   } else {
   1939     PyErr_Format(DecodeError_class, "Error parsing message");
   1940     return NULL;
   1941   }
   1942 }
   1943 
   1944 static PyObject* ParseFromString(CMessage* self, PyObject* arg) {
   1945   if (ScopedPyObjectPtr(Clear(self)) == NULL) {
   1946     return NULL;
   1947   }
   1948   return MergeFromString(self, arg);
   1949 }
   1950 
   1951 static PyObject* ByteSize(CMessage* self, PyObject* args) {
   1952   return PyLong_FromLong(self->message->ByteSize());
   1953 }
   1954 
   1955 static PyObject* RegisterExtension(PyObject* cls,
   1956                                    PyObject* extension_handle) {
   1957   const FieldDescriptor* descriptor =
   1958       GetExtensionDescriptor(extension_handle);
   1959   if (descriptor == NULL) {
   1960     return NULL;
   1961   }
   1962 
   1963   ScopedPyObjectPtr extensions_by_name(
   1964       PyObject_GetAttr(cls, k_extensions_by_name));
   1965   if (extensions_by_name == NULL) {
   1966     PyErr_SetString(PyExc_TypeError, "no extensions_by_name on class");
   1967     return NULL;
   1968   }
   1969   ScopedPyObjectPtr full_name(PyObject_GetAttr(extension_handle, kfull_name));
   1970   if (full_name == NULL) {
   1971     return NULL;
   1972   }
   1973 
   1974   // If the extension was already registered, check that it is the same.
   1975   PyObject* existing_extension =
   1976       PyDict_GetItem(extensions_by_name.get(), full_name.get());
   1977   if (existing_extension != NULL) {
   1978     const FieldDescriptor* existing_extension_descriptor =
   1979         GetExtensionDescriptor(existing_extension);
   1980     if (existing_extension_descriptor != descriptor) {
   1981       PyErr_SetString(PyExc_ValueError, "Double registration of Extensions");
   1982       return NULL;
   1983     }
   1984     // Nothing else to do.
   1985     Py_RETURN_NONE;
   1986   }
   1987 
   1988   if (PyDict_SetItem(extensions_by_name.get(), full_name.get(),
   1989                      extension_handle) < 0) {
   1990     return NULL;
   1991   }
   1992 
   1993   // Also store a mapping from extension number to implementing class.
   1994   ScopedPyObjectPtr extensions_by_number(
   1995       PyObject_GetAttr(cls, k_extensions_by_number));
   1996   if (extensions_by_number == NULL) {
   1997     PyErr_SetString(PyExc_TypeError, "no extensions_by_number on class");
   1998     return NULL;
   1999   }
   2000 
   2001   ScopedPyObjectPtr number(PyObject_GetAttrString(extension_handle, "number"));
   2002   if (number == NULL) {
   2003     return NULL;
   2004   }
   2005 
   2006   // If the extension was already registered by number, check that it is the
   2007   // same.
   2008   existing_extension = PyDict_GetItem(extensions_by_number.get(), number.get());
   2009   if (existing_extension != NULL) {
   2010     const FieldDescriptor* existing_extension_descriptor =
   2011         GetExtensionDescriptor(existing_extension);
   2012     if (existing_extension_descriptor != descriptor) {
   2013       const Descriptor* msg_desc = GetMessageDescriptor(
   2014           reinterpret_cast<PyTypeObject*>(cls));
   2015       PyErr_Format(
   2016           PyExc_ValueError,
   2017           "Extensions \"%s\" and \"%s\" both try to extend message type "
   2018           "\"%s\" with field number %ld.",
   2019           existing_extension_descriptor->full_name().c_str(),
   2020           descriptor->full_name().c_str(),
   2021           msg_desc->full_name().c_str(),
   2022           PyInt_AsLong(number.get()));
   2023       return NULL;
   2024     }
   2025     // Nothing else to do.
   2026     Py_RETURN_NONE;
   2027   }
   2028   if (PyDict_SetItem(extensions_by_number.get(), number.get(),
   2029                      extension_handle) < 0) {
   2030     return NULL;
   2031   }
   2032 
   2033   // Check if it's a message set
   2034   if (descriptor->is_extension() &&
   2035       descriptor->containing_type()->options().message_set_wire_format() &&
   2036       descriptor->type() == FieldDescriptor::TYPE_MESSAGE &&
   2037       descriptor->label() == FieldDescriptor::LABEL_OPTIONAL) {
   2038     ScopedPyObjectPtr message_name(PyString_FromStringAndSize(
   2039         descriptor->message_type()->full_name().c_str(),
   2040         descriptor->message_type()->full_name().size()));
   2041     if (message_name == NULL) {
   2042       return NULL;
   2043     }
   2044     PyDict_SetItem(extensions_by_name.get(), message_name.get(),
   2045                    extension_handle);
   2046   }
   2047 
   2048   Py_RETURN_NONE;
   2049 }
   2050 
   2051 static PyObject* SetInParent(CMessage* self, PyObject* args) {
   2052   AssureWritable(self);
   2053   Py_RETURN_NONE;
   2054 }
   2055 
   2056 static PyObject* WhichOneof(CMessage* self, PyObject* arg) {
   2057   Py_ssize_t name_size;
   2058   char *name_data;
   2059   if (PyString_AsStringAndSize(arg, &name_data, &name_size) < 0)
   2060     return NULL;
   2061   string oneof_name = string(name_data, name_size);
   2062   const OneofDescriptor* oneof_desc =
   2063       self->message->GetDescriptor()->FindOneofByName(oneof_name);
   2064   if (oneof_desc == NULL) {
   2065     PyErr_Format(PyExc_ValueError,
   2066                  "Protocol message has no oneof \"%s\" field.",
   2067                  oneof_name.c_str());
   2068     return NULL;
   2069   }
   2070   const FieldDescriptor* field_in_oneof =
   2071       self->message->GetReflection()->GetOneofFieldDescriptor(
   2072           *self->message, oneof_desc);
   2073   if (field_in_oneof == NULL) {
   2074     Py_RETURN_NONE;
   2075   } else {
   2076     const string& name = field_in_oneof->name();
   2077     return PyString_FromStringAndSize(name.c_str(), name.size());
   2078   }
   2079 }
   2080 
   2081 static PyObject* GetExtensionDict(CMessage* self, void *closure);
   2082 
   2083 static PyObject* ListFields(CMessage* self) {
   2084   vector<const FieldDescriptor*> fields;
   2085   self->message->GetReflection()->ListFields(*self->message, &fields);
   2086 
   2087   // Normally, the list will be exactly the size of the fields.
   2088   ScopedPyObjectPtr all_fields(PyList_New(fields.size()));
   2089   if (all_fields == NULL) {
   2090     return NULL;
   2091   }
   2092 
   2093   // When there are unknown extensions, the py list will *not* contain
   2094   // the field information.  Thus the actual size of the py list will be
   2095   // smaller than the size of fields.  Set the actual size at the end.
   2096   Py_ssize_t actual_size = 0;
   2097   for (size_t i = 0; i < fields.size(); ++i) {
   2098     ScopedPyObjectPtr t(PyTuple_New(2));
   2099     if (t == NULL) {
   2100       return NULL;
   2101     }
   2102 
   2103     if (fields[i]->is_extension()) {
   2104       ScopedPyObjectPtr extension_field(
   2105           PyFieldDescriptor_FromDescriptor(fields[i]));
   2106       if (extension_field == NULL) {
   2107         return NULL;
   2108       }
   2109       // With C++ descriptors, the field can always be retrieved, but for
   2110       // unknown extensions which have not been imported in Python code, there
   2111       // is no message class and we cannot retrieve the value.
   2112       // TODO(amauryfa): consider building the class on the fly!
   2113       if (fields[i]->message_type() != NULL &&
   2114           cdescriptor_pool::GetMessageClass(
   2115               GetDescriptorPoolForMessage(self),
   2116               fields[i]->message_type()) == NULL) {
   2117         PyErr_Clear();
   2118         continue;
   2119       }
   2120       ScopedPyObjectPtr extensions(GetExtensionDict(self, NULL));
   2121       if (extensions == NULL) {
   2122         return NULL;
   2123       }
   2124       // 'extension' reference later stolen by PyTuple_SET_ITEM.
   2125       PyObject* extension = PyObject_GetItem(
   2126           extensions.get(), extension_field.get());
   2127       if (extension == NULL) {
   2128         return NULL;
   2129       }
   2130       PyTuple_SET_ITEM(t.get(), 0, extension_field.release());
   2131       // Steals reference to 'extension'
   2132       PyTuple_SET_ITEM(t.get(), 1, extension);
   2133     } else {
   2134       // Normal field
   2135       const string& field_name = fields[i]->name();
   2136       ScopedPyObjectPtr py_field_name(PyString_FromStringAndSize(
   2137           field_name.c_str(), field_name.length()));
   2138       if (py_field_name == NULL) {
   2139         PyErr_SetString(PyExc_ValueError, "bad string");
   2140         return NULL;
   2141       }
   2142       ScopedPyObjectPtr field_descriptor(
   2143           PyFieldDescriptor_FromDescriptor(fields[i]));
   2144       if (field_descriptor == NULL) {
   2145         return NULL;
   2146       }
   2147 
   2148       PyObject* field_value = GetAttr(self, py_field_name.get());
   2149       if (field_value == NULL) {
   2150         PyErr_SetObject(PyExc_ValueError, py_field_name.get());
   2151         return NULL;
   2152       }
   2153       PyTuple_SET_ITEM(t.get(), 0, field_descriptor.release());
   2154       PyTuple_SET_ITEM(t.get(), 1, field_value);
   2155     }
   2156     PyList_SET_ITEM(all_fields.get(), actual_size, t.release());
   2157     ++actual_size;
   2158   }
   2159   if (static_cast<size_t>(actual_size) != fields.size() &&
   2160       (PyList_SetSlice(all_fields.get(), actual_size, fields.size(), NULL) <
   2161        0)) {
   2162     return NULL;
   2163   }
   2164   return all_fields.release();
   2165 }
   2166 
   2167 static PyObject* DiscardUnknownFields(CMessage* self) {
   2168   AssureWritable(self);
   2169   self->message->DiscardUnknownFields();
   2170   Py_RETURN_NONE;
   2171 }
   2172 
   2173 PyObject* FindInitializationErrors(CMessage* self) {
   2174   Message* message = self->message;
   2175   vector<string> errors;
   2176   message->FindInitializationErrors(&errors);
   2177 
   2178   PyObject* error_list = PyList_New(errors.size());
   2179   if (error_list == NULL) {
   2180     return NULL;
   2181   }
   2182   for (size_t i = 0; i < errors.size(); ++i) {
   2183     const string& error = errors[i];
   2184     PyObject* error_string = PyString_FromStringAndSize(
   2185         error.c_str(), error.length());
   2186     if (error_string == NULL) {
   2187       Py_DECREF(error_list);
   2188       return NULL;
   2189     }
   2190     PyList_SET_ITEM(error_list, i, error_string);
   2191   }
   2192   return error_list;
   2193 }
   2194 
   2195 static PyObject* RichCompare(CMessage* self, PyObject* other, int opid) {
   2196   // Only equality comparisons are implemented.
   2197   if (opid != Py_EQ && opid != Py_NE) {
   2198     Py_INCREF(Py_NotImplemented);
   2199     return Py_NotImplemented;
   2200   }
   2201   bool equals = true;
   2202   // If other is not a message, it cannot be equal.
   2203   if (!PyObject_TypeCheck(other, &CMessage_Type)) {
   2204     equals = false;
   2205   }
   2206   const google::protobuf::Message* other_message =
   2207       reinterpret_cast<CMessage*>(other)->message;
   2208   // If messages don't have the same descriptors, they are not equal.
   2209   if (equals &&
   2210       self->message->GetDescriptor() != other_message->GetDescriptor()) {
   2211     equals = false;
   2212   }
   2213   // Check the message contents.
   2214   if (equals && !google::protobuf::util::MessageDifferencer::Equals(
   2215           *self->message,
   2216           *reinterpret_cast<CMessage*>(other)->message)) {
   2217     equals = false;
   2218   }
   2219   if (equals ^ (opid == Py_EQ)) {
   2220     Py_RETURN_FALSE;
   2221   } else {
   2222     Py_RETURN_TRUE;
   2223   }
   2224 }
   2225 
   2226 PyObject* InternalGetScalar(const Message* message,
   2227                             const FieldDescriptor* field_descriptor) {
   2228   const Reflection* reflection = message->GetReflection();
   2229 
   2230   if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
   2231     return NULL;
   2232   }
   2233 
   2234   PyObject* result = NULL;
   2235   switch (field_descriptor->cpp_type()) {
   2236     case FieldDescriptor::CPPTYPE_INT32: {
   2237       int32 value = reflection->GetInt32(*message, field_descriptor);
   2238       result = PyInt_FromLong(value);
   2239       break;
   2240     }
   2241     case FieldDescriptor::CPPTYPE_INT64: {
   2242       int64 value = reflection->GetInt64(*message, field_descriptor);
   2243       result = PyLong_FromLongLong(value);
   2244       break;
   2245     }
   2246     case FieldDescriptor::CPPTYPE_UINT32: {
   2247       uint32 value = reflection->GetUInt32(*message, field_descriptor);
   2248       result = PyInt_FromSize_t(value);
   2249       break;
   2250     }
   2251     case FieldDescriptor::CPPTYPE_UINT64: {
   2252       uint64 value = reflection->GetUInt64(*message, field_descriptor);
   2253       result = PyLong_FromUnsignedLongLong(value);
   2254       break;
   2255     }
   2256     case FieldDescriptor::CPPTYPE_FLOAT: {
   2257       float value = reflection->GetFloat(*message, field_descriptor);
   2258       result = PyFloat_FromDouble(value);
   2259       break;
   2260     }
   2261     case FieldDescriptor::CPPTYPE_DOUBLE: {
   2262       double value = reflection->GetDouble(*message, field_descriptor);
   2263       result = PyFloat_FromDouble(value);
   2264       break;
   2265     }
   2266     case FieldDescriptor::CPPTYPE_BOOL: {
   2267       bool value = reflection->GetBool(*message, field_descriptor);
   2268       result = PyBool_FromLong(value);
   2269       break;
   2270     }
   2271     case FieldDescriptor::CPPTYPE_STRING: {
   2272       string value = reflection->GetString(*message, field_descriptor);
   2273       result = ToStringObject(field_descriptor, value);
   2274       break;
   2275     }
   2276     case FieldDescriptor::CPPTYPE_ENUM: {
   2277       if (!message->GetReflection()->SupportsUnknownEnumValues() &&
   2278           !message->GetReflection()->HasField(*message, field_descriptor)) {
   2279         // Look for the value in the unknown fields.
   2280         const UnknownFieldSet& unknown_field_set =
   2281             message->GetReflection()->GetUnknownFields(*message);
   2282         for (int i = 0; i < unknown_field_set.field_count(); ++i) {
   2283           if (unknown_field_set.field(i).number() ==
   2284               field_descriptor->number() &&
   2285               unknown_field_set.field(i).type() ==
   2286               google::protobuf::UnknownField::TYPE_VARINT) {
   2287             result = PyInt_FromLong(unknown_field_set.field(i).varint());
   2288             break;
   2289           }
   2290         }
   2291       }
   2292 
   2293       if (result == NULL) {
   2294         const EnumValueDescriptor* enum_value =
   2295             message->GetReflection()->GetEnum(*message, field_descriptor);
   2296         result = PyInt_FromLong(enum_value->number());
   2297       }
   2298       break;
   2299     }
   2300     default:
   2301       PyErr_Format(
   2302           PyExc_SystemError, "Getting a value from a field of unknown type %d",
   2303           field_descriptor->cpp_type());
   2304   }
   2305 
   2306   return result;
   2307 }
   2308 
   2309 PyObject* InternalGetSubMessage(
   2310     CMessage* self, const FieldDescriptor* field_descriptor) {
   2311   const Reflection* reflection = self->message->GetReflection();
   2312   PyDescriptorPool* pool = GetDescriptorPoolForMessage(self);
   2313   const Message& sub_message = reflection->GetMessage(
   2314       *self->message, field_descriptor, pool->message_factory);
   2315 
   2316   CMessageClass* message_class = cdescriptor_pool::GetMessageClass(
   2317       pool, field_descriptor->message_type());
   2318   if (message_class == NULL) {
   2319     return NULL;
   2320   }
   2321 
   2322   CMessage* cmsg = cmessage::NewEmptyMessage(message_class);
   2323   if (cmsg == NULL) {
   2324     return NULL;
   2325   }
   2326 
   2327   cmsg->owner = self->owner;
   2328   cmsg->parent = self;
   2329   cmsg->parent_field_descriptor = field_descriptor;
   2330   cmsg->read_only = !reflection->HasField(*self->message, field_descriptor);
   2331   cmsg->message = const_cast<Message*>(&sub_message);
   2332 
   2333   return reinterpret_cast<PyObject*>(cmsg);
   2334 }
   2335 
   2336 int InternalSetNonOneofScalar(
   2337     Message* message,
   2338     const FieldDescriptor* field_descriptor,
   2339     PyObject* arg) {
   2340   const Reflection* reflection = message->GetReflection();
   2341 
   2342   if (!CheckFieldBelongsToMessage(field_descriptor, message)) {
   2343     return -1;
   2344   }
   2345 
   2346   switch (field_descriptor->cpp_type()) {
   2347     case FieldDescriptor::CPPTYPE_INT32: {
   2348       GOOGLE_CHECK_GET_INT32(arg, value, -1);
   2349       reflection->SetInt32(message, field_descriptor, value);
   2350       break;
   2351     }
   2352     case FieldDescriptor::CPPTYPE_INT64: {
   2353       GOOGLE_CHECK_GET_INT64(arg, value, -1);
   2354       reflection->SetInt64(message, field_descriptor, value);
   2355       break;
   2356     }
   2357     case FieldDescriptor::CPPTYPE_UINT32: {
   2358       GOOGLE_CHECK_GET_UINT32(arg, value, -1);
   2359       reflection->SetUInt32(message, field_descriptor, value);
   2360       break;
   2361     }
   2362     case FieldDescriptor::CPPTYPE_UINT64: {
   2363       GOOGLE_CHECK_GET_UINT64(arg, value, -1);
   2364       reflection->SetUInt64(message, field_descriptor, value);
   2365       break;
   2366     }
   2367     case FieldDescriptor::CPPTYPE_FLOAT: {
   2368       GOOGLE_CHECK_GET_FLOAT(arg, value, -1);
   2369       reflection->SetFloat(message, field_descriptor, value);
   2370       break;
   2371     }
   2372     case FieldDescriptor::CPPTYPE_DOUBLE: {
   2373       GOOGLE_CHECK_GET_DOUBLE(arg, value, -1);
   2374       reflection->SetDouble(message, field_descriptor, value);
   2375       break;
   2376     }
   2377     case FieldDescriptor::CPPTYPE_BOOL: {
   2378       GOOGLE_CHECK_GET_BOOL(arg, value, -1);
   2379       reflection->SetBool(message, field_descriptor, value);
   2380       break;
   2381     }
   2382     case FieldDescriptor::CPPTYPE_STRING: {
   2383       if (!CheckAndSetString(
   2384           arg, message, field_descriptor, reflection, false, -1)) {
   2385         return -1;
   2386       }
   2387       break;
   2388     }
   2389     case FieldDescriptor::CPPTYPE_ENUM: {
   2390       GOOGLE_CHECK_GET_INT32(arg, value, -1);
   2391       if (reflection->SupportsUnknownEnumValues()) {
   2392         reflection->SetEnumValue(message, field_descriptor, value);
   2393       } else {
   2394         const EnumDescriptor* enum_descriptor = field_descriptor->enum_type();
   2395         const EnumValueDescriptor* enum_value =
   2396             enum_descriptor->FindValueByNumber(value);
   2397         if (enum_value != NULL) {
   2398           reflection->SetEnum(message, field_descriptor, enum_value);
   2399         } else {
   2400           PyErr_Format(PyExc_ValueError, "Unknown enum value: %d", value);
   2401           return -1;
   2402         }
   2403       }
   2404       break;
   2405     }
   2406     default:
   2407       PyErr_Format(
   2408           PyExc_SystemError, "Setting value to a field of unknown type %d",
   2409           field_descriptor->cpp_type());
   2410       return -1;
   2411   }
   2412 
   2413   return 0;
   2414 }
   2415 
   2416 int InternalSetScalar(
   2417     CMessage* self,
   2418     const FieldDescriptor* field_descriptor,
   2419     PyObject* arg) {
   2420   if (!CheckFieldBelongsToMessage(field_descriptor, self->message)) {
   2421     return -1;
   2422   }
   2423 
   2424   if (MaybeReleaseOverlappingOneofField(self, field_descriptor) < 0) {
   2425     return -1;
   2426   }
   2427 
   2428   return InternalSetNonOneofScalar(self->message, field_descriptor, arg);
   2429 }
   2430 
   2431 PyObject* FromString(PyTypeObject* cls, PyObject* serialized) {
   2432   PyObject* py_cmsg = PyObject_CallObject(
   2433       reinterpret_cast<PyObject*>(cls), NULL);
   2434   if (py_cmsg == NULL) {
   2435     return NULL;
   2436   }
   2437   CMessage* cmsg = reinterpret_cast<CMessage*>(py_cmsg);
   2438 
   2439   ScopedPyObjectPtr py_length(MergeFromString(cmsg, serialized));
   2440   if (py_length == NULL) {
   2441     Py_DECREF(py_cmsg);
   2442     return NULL;
   2443   }
   2444 
   2445   return py_cmsg;
   2446 }
   2447 
   2448 PyObject* DeepCopy(CMessage* self, PyObject* arg) {
   2449   PyObject* clone = PyObject_CallObject(
   2450       reinterpret_cast<PyObject*>(Py_TYPE(self)), NULL);
   2451   if (clone == NULL) {
   2452     return NULL;
   2453   }
   2454   if (!PyObject_TypeCheck(clone, &CMessage_Type)) {
   2455     Py_DECREF(clone);
   2456     return NULL;
   2457   }
   2458   if (ScopedPyObjectPtr(MergeFrom(
   2459           reinterpret_cast<CMessage*>(clone),
   2460           reinterpret_cast<PyObject*>(self))) == NULL) {
   2461     Py_DECREF(clone);
   2462     return NULL;
   2463   }
   2464   return clone;
   2465 }
   2466 
   2467 PyObject* ToUnicode(CMessage* self) {
   2468   // Lazy import to prevent circular dependencies
   2469   ScopedPyObjectPtr text_format(
   2470       PyImport_ImportModule("google.protobuf.text_format"));
   2471   if (text_format == NULL) {
   2472     return NULL;
   2473   }
   2474   ScopedPyObjectPtr method_name(PyString_FromString("MessageToString"));
   2475   if (method_name == NULL) {
   2476     return NULL;
   2477   }
   2478   Py_INCREF(Py_True);
   2479   ScopedPyObjectPtr encoded(PyObject_CallMethodObjArgs(
   2480       text_format.get(), method_name.get(), self, Py_True, NULL));
   2481   Py_DECREF(Py_True);
   2482   if (encoded == NULL) {
   2483     return NULL;
   2484   }
   2485 #if PY_MAJOR_VERSION < 3
   2486   PyObject* decoded = PyString_AsDecodedObject(encoded.get(), "utf-8", NULL);
   2487 #else
   2488   PyObject* decoded = PyUnicode_FromEncodedObject(encoded.get(), "utf-8", NULL);
   2489 #endif
   2490   if (decoded == NULL) {
   2491     return NULL;
   2492   }
   2493   return decoded;
   2494 }
   2495 
   2496 PyObject* Reduce(CMessage* self) {
   2497   ScopedPyObjectPtr constructor(reinterpret_cast<PyObject*>(Py_TYPE(self)));
   2498   constructor.inc();
   2499   ScopedPyObjectPtr args(PyTuple_New(0));
   2500   if (args == NULL) {
   2501     return NULL;
   2502   }
   2503   ScopedPyObjectPtr state(PyDict_New());
   2504   if (state == NULL) {
   2505     return  NULL;
   2506   }
   2507   ScopedPyObjectPtr serialized(SerializePartialToString(self));
   2508   if (serialized == NULL) {
   2509     return NULL;
   2510   }
   2511   if (PyDict_SetItemString(state.get(), "serialized", serialized.get()) < 0) {
   2512     return NULL;
   2513   }
   2514   return Py_BuildValue("OOO", constructor.get(), args.get(), state.get());
   2515 }
   2516 
   2517 PyObject* SetState(CMessage* self, PyObject* state) {
   2518   if (!PyDict_Check(state)) {
   2519     PyErr_SetString(PyExc_TypeError, "state not a dict");
   2520     return NULL;
   2521   }
   2522   PyObject* serialized = PyDict_GetItemString(state, "serialized");
   2523   if (serialized == NULL) {
   2524     return NULL;
   2525   }
   2526   if (ScopedPyObjectPtr(ParseFromString(self, serialized)) == NULL) {
   2527     return NULL;
   2528   }
   2529   Py_RETURN_NONE;
   2530 }
   2531 
   2532 // CMessage static methods:
   2533 PyObject* _CheckCalledFromGeneratedFile(PyObject* unused,
   2534                                         PyObject* unused_arg) {
   2535   if (!_CalledFromGeneratedFile(1)) {
   2536     PyErr_SetString(PyExc_TypeError,
   2537                     "Descriptors should not be created directly, "
   2538                     "but only retrieved from their parent.");
   2539     return NULL;
   2540   }
   2541   Py_RETURN_NONE;
   2542 }
   2543 
   2544 static PyObject* GetExtensionDict(CMessage* self, void *closure) {
   2545   if (self->extensions)  {
   2546     Py_INCREF(self->extensions);
   2547     return reinterpret_cast<PyObject*>(self->extensions);
   2548   }
   2549 
   2550   // If there are extension_ranges, the message is "extendable". Allocate a
   2551   // dictionary to store the extension fields.
   2552   const Descriptor* descriptor = GetMessageDescriptor(Py_TYPE(self));
   2553   if (descriptor->extension_range_count() > 0) {
   2554     ExtensionDict* extension_dict = extension_dict::NewExtensionDict(self);
   2555     if (extension_dict == NULL) {
   2556       return NULL;
   2557     }
   2558     self->extensions = extension_dict;
   2559     Py_INCREF(self->extensions);
   2560     return reinterpret_cast<PyObject*>(self->extensions);
   2561   }
   2562 
   2563   PyErr_SetNone(PyExc_AttributeError);
   2564   return NULL;
   2565 }
   2566 
   2567 static PyGetSetDef Getters[] = {
   2568   {"Extensions", (getter)GetExtensionDict, NULL, "Extension dict"},
   2569   {NULL}
   2570 };
   2571 
   2572 static PyMethodDef Methods[] = {
   2573   { "__deepcopy__", (PyCFunction)DeepCopy, METH_VARARGS,
   2574     "Makes a deep copy of the class." },
   2575   { "__reduce__", (PyCFunction)Reduce, METH_NOARGS,
   2576     "Outputs picklable representation of the message." },
   2577   { "__setstate__", (PyCFunction)SetState, METH_O,
   2578     "Inputs picklable representation of the message." },
   2579   { "__unicode__", (PyCFunction)ToUnicode, METH_NOARGS,
   2580     "Outputs a unicode representation of the message." },
   2581   { "ByteSize", (PyCFunction)ByteSize, METH_NOARGS,
   2582     "Returns the size of the message in bytes." },
   2583   { "Clear", (PyCFunction)Clear, METH_NOARGS,
   2584     "Clears the message." },
   2585   { "ClearExtension", (PyCFunction)ClearExtension, METH_O,
   2586     "Clears a message field." },
   2587   { "ClearField", (PyCFunction)ClearField, METH_O,
   2588     "Clears a message field." },
   2589   { "CopyFrom", (PyCFunction)CopyFrom, METH_O,
   2590     "Copies a protocol message into the current message." },
   2591   { "DiscardUnknownFields", (PyCFunction)DiscardUnknownFields, METH_NOARGS,
   2592     "Discards the unknown fields." },
   2593   { "FindInitializationErrors", (PyCFunction)FindInitializationErrors,
   2594     METH_NOARGS,
   2595     "Finds unset required fields." },
   2596   { "FromString", (PyCFunction)FromString, METH_O | METH_CLASS,
   2597     "Creates new method instance from given serialized data." },
   2598   { "HasExtension", (PyCFunction)HasExtension, METH_O,
   2599     "Checks if a message field is set." },
   2600   { "HasField", (PyCFunction)HasField, METH_O,
   2601     "Checks if a message field is set." },
   2602   { "IsInitialized", (PyCFunction)IsInitialized, METH_VARARGS,
   2603     "Checks if all required fields of a protocol message are set." },
   2604   { "ListFields", (PyCFunction)ListFields, METH_NOARGS,
   2605     "Lists all set fields of a message." },
   2606   { "MergeFrom", (PyCFunction)MergeFrom, METH_O,
   2607     "Merges a protocol message into the current message." },
   2608   { "MergeFromString", (PyCFunction)MergeFromString, METH_O,
   2609     "Merges a serialized message into the current message." },
   2610   { "ParseFromString", (PyCFunction)ParseFromString, METH_O,
   2611     "Parses a serialized message into the current message." },
   2612   { "RegisterExtension", (PyCFunction)RegisterExtension, METH_O | METH_CLASS,
   2613     "Registers an extension with the current message." },
   2614   { "SerializePartialToString", (PyCFunction)SerializePartialToString,
   2615     METH_NOARGS,
   2616     "Serializes the message to a string, even if it isn't initialized." },
   2617   { "SerializeToString", (PyCFunction)SerializeToString, METH_NOARGS,
   2618     "Serializes the message to a string, only for initialized messages." },
   2619   { "SetInParent", (PyCFunction)SetInParent, METH_NOARGS,
   2620     "Sets the has bit of the given field in its parent message." },
   2621   { "WhichOneof", (PyCFunction)WhichOneof, METH_O,
   2622     "Returns the name of the field set inside a oneof, "
   2623     "or None if no field is set." },
   2624 
   2625   // Static Methods.
   2626   { "_CheckCalledFromGeneratedFile", (PyCFunction)_CheckCalledFromGeneratedFile,
   2627     METH_NOARGS | METH_STATIC,
   2628     "Raises TypeError if the caller is not in a _pb2.py file."},
   2629   { NULL, NULL}
   2630 };
   2631 
   2632 static bool SetCompositeField(
   2633     CMessage* self, PyObject* name, PyObject* value) {
   2634   if (self->composite_fields == NULL) {
   2635     self->composite_fields = PyDict_New();
   2636     if (self->composite_fields == NULL) {
   2637       return false;
   2638     }
   2639   }
   2640   return PyDict_SetItem(self->composite_fields, name, value) == 0;
   2641 }
   2642 
   2643 PyObject* GetAttr(CMessage* self, PyObject* name) {
   2644   PyObject* value = self->composite_fields ?
   2645       PyDict_GetItem(self->composite_fields, name) : NULL;
   2646   if (value != NULL) {
   2647     Py_INCREF(value);
   2648     return value;
   2649   }
   2650 
   2651   const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
   2652   if (field_descriptor == NULL) {
   2653     return CMessage_Type.tp_base->tp_getattro(
   2654         reinterpret_cast<PyObject*>(self), name);
   2655   }
   2656 
   2657   if (field_descriptor->is_map()) {
   2658     PyObject* py_container = NULL;
   2659     const Descriptor* entry_type = field_descriptor->message_type();
   2660     const FieldDescriptor* value_type = entry_type->FindFieldByName("value");
   2661     if (value_type->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   2662       CMessageClass* value_class = cdescriptor_pool::GetMessageClass(
   2663           GetDescriptorPoolForMessage(self), value_type->message_type());
   2664       if (value_class == NULL) {
   2665         return NULL;
   2666       }
   2667       py_container =
   2668           NewMessageMapContainer(self, field_descriptor, value_class);
   2669     } else {
   2670       py_container = NewScalarMapContainer(self, field_descriptor);
   2671     }
   2672     if (py_container == NULL) {
   2673       return NULL;
   2674     }
   2675     if (!SetCompositeField(self, name, py_container)) {
   2676       Py_DECREF(py_container);
   2677       return NULL;
   2678     }
   2679     return py_container;
   2680   }
   2681 
   2682   if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
   2683     PyObject* py_container = NULL;
   2684     if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   2685       CMessageClass* message_class = cdescriptor_pool::GetMessageClass(
   2686           GetDescriptorPoolForMessage(self), field_descriptor->message_type());
   2687       if (message_class == NULL) {
   2688         return NULL;
   2689       }
   2690       py_container = repeated_composite_container::NewContainer(
   2691           self, field_descriptor, message_class);
   2692     } else {
   2693       py_container = repeated_scalar_container::NewContainer(
   2694           self, field_descriptor);
   2695     }
   2696     if (py_container == NULL) {
   2697       return NULL;
   2698     }
   2699     if (!SetCompositeField(self, name, py_container)) {
   2700       Py_DECREF(py_container);
   2701       return NULL;
   2702     }
   2703     return py_container;
   2704   }
   2705 
   2706   if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   2707     PyObject* sub_message = InternalGetSubMessage(self, field_descriptor);
   2708     if (sub_message == NULL) {
   2709       return NULL;
   2710     }
   2711     if (!SetCompositeField(self, name, sub_message)) {
   2712       Py_DECREF(sub_message);
   2713       return NULL;
   2714     }
   2715     return sub_message;
   2716   }
   2717 
   2718   return InternalGetScalar(self->message, field_descriptor);
   2719 }
   2720 
   2721 int SetAttr(CMessage* self, PyObject* name, PyObject* value) {
   2722   if (self->composite_fields && PyDict_Contains(self->composite_fields, name)) {
   2723     PyErr_SetString(PyExc_TypeError, "Can't set composite field");
   2724     return -1;
   2725   }
   2726 
   2727   const FieldDescriptor* field_descriptor = GetFieldDescriptor(self, name);
   2728   if (field_descriptor != NULL) {
   2729     AssureWritable(self);
   2730     if (field_descriptor->label() == FieldDescriptor::LABEL_REPEATED) {
   2731       PyErr_Format(PyExc_AttributeError, "Assignment not allowed to repeated "
   2732                    "field \"%s\" in protocol message object.",
   2733                    field_descriptor->name().c_str());
   2734       return -1;
   2735     } else {
   2736       if (field_descriptor->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
   2737         PyErr_Format(PyExc_AttributeError, "Assignment not allowed to "
   2738                      "field \"%s\" in protocol message object.",
   2739                      field_descriptor->name().c_str());
   2740         return -1;
   2741       } else {
   2742         return InternalSetScalar(self, field_descriptor, value);
   2743       }
   2744     }
   2745   }
   2746 
   2747   PyErr_Format(PyExc_AttributeError,
   2748                "Assignment not allowed "
   2749                "(no field \"%s\" in protocol message object).",
   2750                PyString_AsString(name));
   2751   return -1;
   2752 }
   2753 
   2754 }  // namespace cmessage
   2755 
   2756 PyTypeObject CMessage_Type = {
   2757   PyVarObject_HEAD_INIT(&CMessageClass_Type, 0)
   2758   FULL_MODULE_NAME ".CMessage",        // tp_name
   2759   sizeof(CMessage),                    // tp_basicsize
   2760   0,                                   //  tp_itemsize
   2761   (destructor)cmessage::Dealloc,       //  tp_dealloc
   2762   0,                                   //  tp_print
   2763   0,                                   //  tp_getattr
   2764   0,                                   //  tp_setattr
   2765   0,                                   //  tp_compare
   2766   (reprfunc)cmessage::ToStr,           //  tp_repr
   2767   0,                                   //  tp_as_number
   2768   0,                                   //  tp_as_sequence
   2769   0,                                   //  tp_as_mapping
   2770   PyObject_HashNotImplemented,         //  tp_hash
   2771   0,                                   //  tp_call
   2772   (reprfunc)cmessage::ToStr,           //  tp_str
   2773   (getattrofunc)cmessage::GetAttr,     //  tp_getattro
   2774   (setattrofunc)cmessage::SetAttr,     //  tp_setattro
   2775   0,                                   //  tp_as_buffer
   2776   Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  //  tp_flags
   2777   "A ProtocolMessage",                 //  tp_doc
   2778   0,                                   //  tp_traverse
   2779   0,                                   //  tp_clear
   2780   (richcmpfunc)cmessage::RichCompare,  //  tp_richcompare
   2781   0,                                   //  tp_weaklistoffset
   2782   0,                                   //  tp_iter
   2783   0,                                   //  tp_iternext
   2784   cmessage::Methods,                   //  tp_methods
   2785   0,                                   //  tp_members
   2786   cmessage::Getters,                   //  tp_getset
   2787   0,                                   //  tp_base
   2788   0,                                   //  tp_dict
   2789   0,                                   //  tp_descr_get
   2790   0,                                   //  tp_descr_set
   2791   0,                                   //  tp_dictoffset
   2792   (initproc)cmessage::Init,            //  tp_init
   2793   0,                                   //  tp_alloc
   2794   cmessage::New,                       //  tp_new
   2795 };
   2796 
   2797 // --- Exposing the C proto living inside Python proto to C code:
   2798 
   2799 const Message* (*GetCProtoInsidePyProtoPtr)(PyObject* msg);
   2800 Message* (*MutableCProtoInsidePyProtoPtr)(PyObject* msg);
   2801 
   2802 static const Message* GetCProtoInsidePyProtoImpl(PyObject* msg) {
   2803   if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
   2804     return NULL;
   2805   }
   2806   CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
   2807   return cmsg->message;
   2808 }
   2809 
   2810 static Message* MutableCProtoInsidePyProtoImpl(PyObject* msg) {
   2811   if (!PyObject_TypeCheck(msg, &CMessage_Type)) {
   2812     return NULL;
   2813   }
   2814   CMessage* cmsg = reinterpret_cast<CMessage*>(msg);
   2815   if ((cmsg->composite_fields && PyDict_Size(cmsg->composite_fields) != 0) ||
   2816       (cmsg->extensions != NULL &&
   2817        PyDict_Size(cmsg->extensions->values) != 0)) {
   2818     // There is currently no way of accurately syncing arbitrary changes to
   2819     // the underlying C++ message back to the CMessage (e.g. removed repeated
   2820     // composite containers). We only allow direct mutation of the underlying
   2821     // C++ message if there is no child data in the CMessage.
   2822     return NULL;
   2823   }
   2824   cmessage::AssureWritable(cmsg);
   2825   return cmsg->message;
   2826 }
   2827 
   2828 static const char module_docstring[] =
   2829 "python-proto2 is a module that can be used to enhance proto2 Python API\n"
   2830 "performance.\n"
   2831 "\n"
   2832 "It provides access to the protocol buffers C++ reflection API that\n"
   2833 "implements the basic protocol buffer functions.";
   2834 
   2835 void InitGlobals() {
   2836   // TODO(gps): Check all return values in this function for NULL and propagate
   2837   // the error (MemoryError) on up to result in an import failure.  These should
   2838   // also be freed and reset to NULL during finalization.
   2839   kPythonZero = PyInt_FromLong(0);
   2840   kint32min_py = PyInt_FromLong(kint32min);
   2841   kint32max_py = PyInt_FromLong(kint32max);
   2842   kuint32max_py = PyLong_FromLongLong(kuint32max);
   2843   kint64min_py = PyLong_FromLongLong(kint64min);
   2844   kint64max_py = PyLong_FromLongLong(kint64max);
   2845   kuint64max_py = PyLong_FromUnsignedLongLong(kuint64max);
   2846 
   2847   kDESCRIPTOR = PyString_FromString("DESCRIPTOR");
   2848   k_cdescriptor = PyString_FromString("_cdescriptor");
   2849   kfull_name = PyString_FromString("full_name");
   2850   k_extensions_by_name = PyString_FromString("_extensions_by_name");
   2851   k_extensions_by_number = PyString_FromString("_extensions_by_number");
   2852 
   2853   PyObject *dummy_obj = PySet_New(NULL);
   2854   kEmptyWeakref = PyWeakref_NewRef(dummy_obj, NULL);
   2855   Py_DECREF(dummy_obj);
   2856 }
   2857 
   2858 bool InitProto2MessageModule(PyObject *m) {
   2859   // Initialize types and globals in descriptor.cc
   2860   if (!InitDescriptor()) {
   2861     return false;
   2862   }
   2863 
   2864   // Initialize types and globals in descriptor_pool.cc
   2865   if (!InitDescriptorPool()) {
   2866     return false;
   2867   }
   2868 
   2869   // Initialize constants defined in this file.
   2870   InitGlobals();
   2871 
   2872   CMessageClass_Type.tp_base = &PyType_Type;
   2873   if (PyType_Ready(&CMessageClass_Type) < 0) {
   2874     return false;
   2875   }
   2876   PyModule_AddObject(m, "MessageMeta",
   2877                      reinterpret_cast<PyObject*>(&CMessageClass_Type));
   2878 
   2879   if (PyType_Ready(&CMessage_Type) < 0) {
   2880     return false;
   2881   }
   2882 
   2883   // DESCRIPTOR is set on each protocol buffer message class elsewhere, but set
   2884   // it here as well to document that subclasses need to set it.
   2885   PyDict_SetItem(CMessage_Type.tp_dict, kDESCRIPTOR, Py_None);
   2886   // Subclasses with message extensions will override _extensions_by_name and
   2887   // _extensions_by_number with fresh mutable dictionaries in AddDescriptors.
   2888   // All other classes can share this same immutable mapping.
   2889   ScopedPyObjectPtr empty_dict(PyDict_New());
   2890   if (empty_dict == NULL) {
   2891     return false;
   2892   }
   2893   ScopedPyObjectPtr immutable_dict(PyDictProxy_New(empty_dict.get()));
   2894   if (immutable_dict == NULL) {
   2895     return false;
   2896   }
   2897   if (PyDict_SetItem(CMessage_Type.tp_dict,
   2898                      k_extensions_by_name, immutable_dict.get()) < 0) {
   2899     return false;
   2900   }
   2901   if (PyDict_SetItem(CMessage_Type.tp_dict,
   2902                      k_extensions_by_number, immutable_dict.get()) < 0) {
   2903     return false;
   2904   }
   2905 
   2906   PyModule_AddObject(m, "Message", reinterpret_cast<PyObject*>(&CMessage_Type));
   2907 
   2908   // Initialize Repeated container types.
   2909   {
   2910     if (PyType_Ready(&RepeatedScalarContainer_Type) < 0) {
   2911       return false;
   2912     }
   2913 
   2914     PyModule_AddObject(m, "RepeatedScalarContainer",
   2915                        reinterpret_cast<PyObject*>(
   2916                            &RepeatedScalarContainer_Type));
   2917 
   2918     if (PyType_Ready(&RepeatedCompositeContainer_Type) < 0) {
   2919       return false;
   2920     }
   2921 
   2922     PyModule_AddObject(
   2923         m, "RepeatedCompositeContainer",
   2924         reinterpret_cast<PyObject*>(
   2925             &RepeatedCompositeContainer_Type));
   2926 
   2927     // Register them as collections.Sequence
   2928     ScopedPyObjectPtr collections(PyImport_ImportModule("collections"));
   2929     if (collections == NULL) {
   2930       return false;
   2931     }
   2932     ScopedPyObjectPtr mutable_sequence(
   2933         PyObject_GetAttrString(collections.get(), "MutableSequence"));
   2934     if (mutable_sequence == NULL) {
   2935       return false;
   2936     }
   2937     if (ScopedPyObjectPtr(
   2938             PyObject_CallMethod(mutable_sequence.get(), "register", "O",
   2939                                 &RepeatedScalarContainer_Type)) == NULL) {
   2940       return false;
   2941     }
   2942     if (ScopedPyObjectPtr(
   2943             PyObject_CallMethod(mutable_sequence.get(), "register", "O",
   2944                                 &RepeatedCompositeContainer_Type)) == NULL) {
   2945       return false;
   2946     }
   2947   }
   2948 
   2949   // Initialize Map container types.
   2950   {
   2951     // ScalarMapContainer_Type derives from our MutableMapping type.
   2952     ScopedPyObjectPtr containers(PyImport_ImportModule(
   2953         "google.protobuf.internal.containers"));
   2954     if (containers == NULL) {
   2955       return false;
   2956     }
   2957 
   2958     ScopedPyObjectPtr mutable_mapping(
   2959         PyObject_GetAttrString(containers.get(), "MutableMapping"));
   2960     if (mutable_mapping == NULL) {
   2961       return false;
   2962     }
   2963 
   2964     if (!PyObject_TypeCheck(mutable_mapping.get(), &PyType_Type)) {
   2965       return false;
   2966     }
   2967 
   2968     Py_INCREF(mutable_mapping.get());
   2969 #if PY_MAJOR_VERSION >= 3
   2970     PyObject* bases = PyTuple_New(1);
   2971     PyTuple_SET_ITEM(bases, 0, mutable_mapping.get());
   2972 
   2973     ScalarMapContainer_Type =
   2974         PyType_FromSpecWithBases(&ScalarMapContainer_Type_spec, bases);
   2975     PyModule_AddObject(m, "ScalarMapContainer", ScalarMapContainer_Type);
   2976 #else
   2977     ScalarMapContainer_Type.tp_base =
   2978         reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
   2979 
   2980     if (PyType_Ready(&ScalarMapContainer_Type) < 0) {
   2981       return false;
   2982     }
   2983 
   2984     PyModule_AddObject(m, "ScalarMapContainer",
   2985                        reinterpret_cast<PyObject*>(&ScalarMapContainer_Type));
   2986 #endif
   2987 
   2988     if (PyType_Ready(&MapIterator_Type) < 0) {
   2989       return false;
   2990     }
   2991 
   2992     PyModule_AddObject(m, "MapIterator",
   2993                        reinterpret_cast<PyObject*>(&MapIterator_Type));
   2994 
   2995 
   2996 #if PY_MAJOR_VERSION >= 3
   2997     MessageMapContainer_Type =
   2998         PyType_FromSpecWithBases(&MessageMapContainer_Type_spec, bases);
   2999     PyModule_AddObject(m, "MessageMapContainer", MessageMapContainer_Type);
   3000 #else
   3001     Py_INCREF(mutable_mapping.get());
   3002     MessageMapContainer_Type.tp_base =
   3003         reinterpret_cast<PyTypeObject*>(mutable_mapping.get());
   3004 
   3005     if (PyType_Ready(&MessageMapContainer_Type) < 0) {
   3006       return false;
   3007     }
   3008 
   3009     PyModule_AddObject(m, "MessageMapContainer",
   3010                        reinterpret_cast<PyObject*>(&MessageMapContainer_Type));
   3011 #endif
   3012   }
   3013 
   3014   if (PyType_Ready(&ExtensionDict_Type) < 0) {
   3015     return false;
   3016   }
   3017   PyModule_AddObject(
   3018       m, "ExtensionDict",
   3019       reinterpret_cast<PyObject*>(&ExtensionDict_Type));
   3020 
   3021   // Expose the DescriptorPool used to hold all descriptors added from generated
   3022   // pb2.py files.
   3023   // PyModule_AddObject steals a reference.
   3024   Py_INCREF(GetDefaultDescriptorPool());
   3025   PyModule_AddObject(m, "default_pool",
   3026                      reinterpret_cast<PyObject*>(GetDefaultDescriptorPool()));
   3027 
   3028   PyModule_AddObject(m, "DescriptorPool", reinterpret_cast<PyObject*>(
   3029       &PyDescriptorPool_Type));
   3030 
   3031   // This implementation provides full Descriptor types, we advertise it so that
   3032   // descriptor.py can use them in replacement of the Python classes.
   3033   PyModule_AddIntConstant(m, "_USE_C_DESCRIPTORS", 1);
   3034 
   3035   PyModule_AddObject(m, "Descriptor", reinterpret_cast<PyObject*>(
   3036       &PyMessageDescriptor_Type));
   3037   PyModule_AddObject(m, "FieldDescriptor", reinterpret_cast<PyObject*>(
   3038       &PyFieldDescriptor_Type));
   3039   PyModule_AddObject(m, "EnumDescriptor", reinterpret_cast<PyObject*>(
   3040       &PyEnumDescriptor_Type));
   3041   PyModule_AddObject(m, "EnumValueDescriptor", reinterpret_cast<PyObject*>(
   3042       &PyEnumValueDescriptor_Type));
   3043   PyModule_AddObject(m, "FileDescriptor", reinterpret_cast<PyObject*>(
   3044       &PyFileDescriptor_Type));
   3045   PyModule_AddObject(m, "OneofDescriptor", reinterpret_cast<PyObject*>(
   3046       &PyOneofDescriptor_Type));
   3047 
   3048   PyObject* enum_type_wrapper = PyImport_ImportModule(
   3049       "google.protobuf.internal.enum_type_wrapper");
   3050   if (enum_type_wrapper == NULL) {
   3051     return false;
   3052   }
   3053   EnumTypeWrapper_class =
   3054       PyObject_GetAttrString(enum_type_wrapper, "EnumTypeWrapper");
   3055   Py_DECREF(enum_type_wrapper);
   3056 
   3057   PyObject* message_module = PyImport_ImportModule(
   3058       "google.protobuf.message");
   3059   if (message_module == NULL) {
   3060     return false;
   3061   }
   3062   EncodeError_class = PyObject_GetAttrString(message_module, "EncodeError");
   3063   DecodeError_class = PyObject_GetAttrString(message_module, "DecodeError");
   3064   PythonMessage_class = PyObject_GetAttrString(message_module, "Message");
   3065   Py_DECREF(message_module);
   3066 
   3067   PyObject* pickle_module = PyImport_ImportModule("pickle");
   3068   if (pickle_module == NULL) {
   3069     return false;
   3070   }
   3071   PickleError_class = PyObject_GetAttrString(pickle_module, "PickleError");
   3072   Py_DECREF(pickle_module);
   3073 
   3074   // Override {Get,Mutable}CProtoInsidePyProto.
   3075   GetCProtoInsidePyProtoPtr = GetCProtoInsidePyProtoImpl;
   3076   MutableCProtoInsidePyProtoPtr = MutableCProtoInsidePyProtoImpl;
   3077 
   3078   return true;
   3079 }
   3080 
   3081 }  // namespace python
   3082 }  // namespace protobuf
   3083 
   3084 static PyMethodDef ModuleMethods[] = {
   3085   {"SetAllowOversizeProtos",
   3086     (PyCFunction)google::protobuf::python::cmessage::SetAllowOversizeProtos,
   3087     METH_O, "Enable/disable oversize proto parsing."},
   3088   { NULL, NULL}
   3089 };
   3090 
   3091 #if PY_MAJOR_VERSION >= 3
   3092 static struct PyModuleDef _module = {
   3093   PyModuleDef_HEAD_INIT,
   3094   "_message",
   3095   google::protobuf::python::module_docstring,
   3096   -1,
   3097   ModuleMethods,  /* m_methods */
   3098   NULL,
   3099   NULL,
   3100   NULL,
   3101   NULL
   3102 };
   3103 #define INITFUNC PyInit__message
   3104 #define INITFUNC_ERRORVAL NULL
   3105 #else  // Python 2
   3106 #define INITFUNC init_message
   3107 #define INITFUNC_ERRORVAL
   3108 #endif
   3109 
   3110 extern "C" {
   3111   PyMODINIT_FUNC INITFUNC(void) {
   3112     PyObject* m;
   3113 #if PY_MAJOR_VERSION >= 3
   3114     m = PyModule_Create(&_module);
   3115 #else
   3116     m = Py_InitModule3("_message", ModuleMethods,
   3117                        google::protobuf::python::module_docstring);
   3118 #endif
   3119     if (m == NULL) {
   3120       return INITFUNC_ERRORVAL;
   3121     }
   3122 
   3123     if (!google::protobuf::python::InitProto2MessageModule(m)) {
   3124       Py_DECREF(m);
   3125       return INITFUNC_ERRORVAL;
   3126     }
   3127 
   3128 #if PY_MAJOR_VERSION >= 3
   3129     return m;
   3130 #endif
   3131   }
   3132 }
   3133 }  // namespace google
   3134