Home | History | Annotate | Download | only in protobuf_c
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2014 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 #include "protobuf.h"
     32 
     33 // This function is equivalent to rb_str_cat(), but unlike the real
     34 // rb_str_cat(), it doesn't leak memory in some versions of Ruby.
     35 // For more information, see:
     36 //   https://bugs.ruby-lang.org/issues/11328
     37 VALUE noleak_rb_str_cat(VALUE rb_str, const char *str, long len) {
     38   char *p;
     39   size_t oldlen = RSTRING_LEN(rb_str);
     40   rb_str_modify_expand(rb_str, len);
     41   p = RSTRING_PTR(rb_str);
     42   memcpy(p + oldlen, str, len);
     43   rb_str_set_len(rb_str, oldlen + len);
     44   return rb_str;
     45 }
     46 
     47 // -----------------------------------------------------------------------------
     48 // Parsing.
     49 // -----------------------------------------------------------------------------
     50 
     51 #define DEREF(msg, ofs, type) *(type*)(((uint8_t *)msg) + ofs)
     52 
     53 // Creates a handlerdata that simply contains the offset for this field.
     54 static const void* newhandlerdata(upb_handlers* h, uint32_t ofs) {
     55   size_t* hd_ofs = ALLOC(size_t);
     56   *hd_ofs = ofs;
     57   upb_handlers_addcleanup(h, hd_ofs, free);
     58   return hd_ofs;
     59 }
     60 
     61 typedef struct {
     62   size_t ofs;
     63   const upb_msgdef *md;
     64 } submsg_handlerdata_t;
     65 
     66 // Creates a handlerdata that contains offset and submessage type information.
     67 static const void *newsubmsghandlerdata(upb_handlers* h, uint32_t ofs,
     68                                         const upb_fielddef* f) {
     69   submsg_handlerdata_t *hd = ALLOC(submsg_handlerdata_t);
     70   hd->ofs = ofs;
     71   hd->md = upb_fielddef_msgsubdef(f);
     72   upb_handlers_addcleanup(h, hd, free);
     73   return hd;
     74 }
     75 
     76 typedef struct {
     77   size_t ofs;              // union data slot
     78   size_t case_ofs;         // oneof_case field
     79   uint32_t oneof_case_num; // oneof-case number to place in oneof_case field
     80   const upb_msgdef *md;    // msgdef, for oneof submessage handler
     81 } oneof_handlerdata_t;
     82 
     83 static const void *newoneofhandlerdata(upb_handlers *h,
     84                                        uint32_t ofs,
     85                                        uint32_t case_ofs,
     86                                        const upb_fielddef *f) {
     87   oneof_handlerdata_t *hd = ALLOC(oneof_handlerdata_t);
     88   hd->ofs = ofs;
     89   hd->case_ofs = case_ofs;
     90   // We reuse the field tag number as a oneof union discriminant tag. Note that
     91   // we don't expose these numbers to the user, so the only requirement is that
     92   // we have some unique ID for each union case/possibility. The field tag
     93   // numbers are already present and are easy to use so there's no reason to
     94   // create a separate ID space. In addition, using the field tag number here
     95   // lets us easily look up the field in the oneof accessor.
     96   hd->oneof_case_num = upb_fielddef_number(f);
     97   if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE) {
     98     hd->md = upb_fielddef_msgsubdef(f);
     99   } else {
    100     hd->md = NULL;
    101   }
    102   upb_handlers_addcleanup(h, hd, free);
    103   return hd;
    104 }
    105 
    106 // A handler that starts a repeated field.  Gets the Repeated*Field instance for
    107 // this field (such an instance always exists even in an empty message).
    108 static void *startseq_handler(void* closure, const void* hd) {
    109   MessageHeader* msg = closure;
    110   const size_t *ofs = hd;
    111   return (void*)DEREF(msg, *ofs, VALUE);
    112 }
    113 
    114 // Handlers that append primitive values to a repeated field.
    115 #define DEFINE_APPEND_HANDLER(type, ctype)                 \
    116   static bool append##type##_handler(void *closure, const void *hd, \
    117                                      ctype val) {                   \
    118     VALUE ary = (VALUE)closure;                                     \
    119     RepeatedField_push_native(ary, &val);                           \
    120     return true;                                                    \
    121   }
    122 
    123 DEFINE_APPEND_HANDLER(bool,   bool)
    124 DEFINE_APPEND_HANDLER(int32,  int32_t)
    125 DEFINE_APPEND_HANDLER(uint32, uint32_t)
    126 DEFINE_APPEND_HANDLER(float,  float)
    127 DEFINE_APPEND_HANDLER(int64,  int64_t)
    128 DEFINE_APPEND_HANDLER(uint64, uint64_t)
    129 DEFINE_APPEND_HANDLER(double, double)
    130 
    131 // Appends a string to a repeated field.
    132 static void* appendstr_handler(void *closure,
    133                                const void *hd,
    134                                size_t size_hint) {
    135   VALUE ary = (VALUE)closure;
    136   VALUE str = rb_str_new2("");
    137   rb_enc_associate(str, kRubyStringUtf8Encoding);
    138   RepeatedField_push(ary, str);
    139   return (void*)str;
    140 }
    141 
    142 // Appends a 'bytes' string to a repeated field.
    143 static void* appendbytes_handler(void *closure,
    144                                  const void *hd,
    145                                  size_t size_hint) {
    146   VALUE ary = (VALUE)closure;
    147   VALUE str = rb_str_new2("");
    148   rb_enc_associate(str, kRubyString8bitEncoding);
    149   RepeatedField_push(ary, str);
    150   return (void*)str;
    151 }
    152 
    153 // Sets a non-repeated string field in a message.
    154 static void* str_handler(void *closure,
    155                          const void *hd,
    156                          size_t size_hint) {
    157   MessageHeader* msg = closure;
    158   const size_t *ofs = hd;
    159   VALUE str = rb_str_new2("");
    160   rb_enc_associate(str, kRubyStringUtf8Encoding);
    161   DEREF(msg, *ofs, VALUE) = str;
    162   return (void*)str;
    163 }
    164 
    165 // Sets a non-repeated 'bytes' field in a message.
    166 static void* bytes_handler(void *closure,
    167                            const void *hd,
    168                            size_t size_hint) {
    169   MessageHeader* msg = closure;
    170   const size_t *ofs = hd;
    171   VALUE str = rb_str_new2("");
    172   rb_enc_associate(str, kRubyString8bitEncoding);
    173   DEREF(msg, *ofs, VALUE) = str;
    174   return (void*)str;
    175 }
    176 
    177 static size_t stringdata_handler(void* closure, const void* hd,
    178                                  const char* str, size_t len,
    179                                  const upb_bufhandle* handle) {
    180   VALUE rb_str = (VALUE)closure;
    181   noleak_rb_str_cat(rb_str, str, len);
    182   return len;
    183 }
    184 
    185 // Appends a submessage to a repeated field (a regular Ruby array for now).
    186 static void *appendsubmsg_handler(void *closure, const void *hd) {
    187   VALUE ary = (VALUE)closure;
    188   const submsg_handlerdata_t *submsgdata = hd;
    189   VALUE subdesc =
    190       get_def_obj((void*)submsgdata->md);
    191   VALUE subklass = Descriptor_msgclass(subdesc);
    192   MessageHeader* submsg;
    193 
    194   VALUE submsg_rb = rb_class_new_instance(0, NULL, subklass);
    195   RepeatedField_push(ary, submsg_rb);
    196 
    197   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
    198   return submsg;
    199 }
    200 
    201 // Sets a non-repeated submessage field in a message.
    202 static void *submsg_handler(void *closure, const void *hd) {
    203   MessageHeader* msg = closure;
    204   const submsg_handlerdata_t* submsgdata = hd;
    205   VALUE subdesc =
    206       get_def_obj((void*)submsgdata->md);
    207   VALUE subklass = Descriptor_msgclass(subdesc);
    208   VALUE submsg_rb;
    209   MessageHeader* submsg;
    210 
    211   if (DEREF(msg, submsgdata->ofs, VALUE) == Qnil) {
    212     DEREF(msg, submsgdata->ofs, VALUE) =
    213         rb_class_new_instance(0, NULL, subklass);
    214   }
    215 
    216   submsg_rb = DEREF(msg, submsgdata->ofs, VALUE);
    217   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
    218   return submsg;
    219 }
    220 
    221 // Handler data for startmap/endmap handlers.
    222 typedef struct {
    223   size_t ofs;
    224   upb_fieldtype_t key_field_type;
    225   upb_fieldtype_t value_field_type;
    226 
    227   // We know that we can hold this reference because the handlerdata has the
    228   // same lifetime as the upb_handlers struct, and the upb_handlers struct holds
    229   // a reference to the upb_msgdef, which in turn has references to its subdefs.
    230   const upb_def* value_field_subdef;
    231 } map_handlerdata_t;
    232 
    233 // Temporary frame for map parsing: at the beginning of a map entry message, a
    234 // submsg handler allocates a frame to hold (i) a reference to the Map object
    235 // into which this message will be inserted and (ii) storage slots to
    236 // temporarily hold the key and value for this map entry until the end of the
    237 // submessage. When the submessage ends, another handler is called to insert the
    238 // value into the map.
    239 typedef struct {
    240   VALUE map;
    241   char key_storage[NATIVE_SLOT_MAX_SIZE];
    242   char value_storage[NATIVE_SLOT_MAX_SIZE];
    243 } map_parse_frame_t;
    244 
    245 // Handler to begin a map entry: allocates a temporary frame. This is the
    246 // 'startsubmsg' handler on the msgdef that contains the map field.
    247 static void *startmapentry_handler(void *closure, const void *hd) {
    248   MessageHeader* msg = closure;
    249   const map_handlerdata_t* mapdata = hd;
    250   VALUE map_rb = DEREF(msg, mapdata->ofs, VALUE);
    251 
    252   map_parse_frame_t* frame = ALLOC(map_parse_frame_t);
    253   frame->map = map_rb;
    254 
    255   native_slot_init(mapdata->key_field_type, &frame->key_storage);
    256   native_slot_init(mapdata->value_field_type, &frame->value_storage);
    257 
    258   return frame;
    259 }
    260 
    261 // Handler to end a map entry: inserts the value defined during the message into
    262 // the map. This is the 'endmsg' handler on the map entry msgdef.
    263 static bool endmap_handler(void *closure, const void *hd, upb_status* s) {
    264   map_parse_frame_t* frame = closure;
    265   const map_handlerdata_t* mapdata = hd;
    266 
    267   VALUE key = native_slot_get(
    268       mapdata->key_field_type, Qnil,
    269       &frame->key_storage);
    270 
    271   VALUE value_field_typeclass = Qnil;
    272   VALUE value;
    273 
    274   if (mapdata->value_field_type == UPB_TYPE_MESSAGE ||
    275       mapdata->value_field_type == UPB_TYPE_ENUM) {
    276     value_field_typeclass = get_def_obj(mapdata->value_field_subdef);
    277   }
    278 
    279   value = native_slot_get(
    280       mapdata->value_field_type, value_field_typeclass,
    281       &frame->value_storage);
    282 
    283   Map_index_set(frame->map, key, value);
    284   free(frame);
    285 
    286   return true;
    287 }
    288 
    289 // Allocates a new map_handlerdata_t given the map entry message definition. If
    290 // the offset of the field within the parent message is also given, that is
    291 // added to the handler data as well. Note that this is called *twice* per map
    292 // field: once in the parent message handler setup when setting the startsubmsg
    293 // handler and once in the map entry message handler setup when setting the
    294 // key/value and endmsg handlers. The reason is that there is no easy way to
    295 // pass the handlerdata down to the sub-message handler setup.
    296 static map_handlerdata_t* new_map_handlerdata(
    297     size_t ofs,
    298     const upb_msgdef* mapentry_def,
    299     Descriptor* desc) {
    300   const upb_fielddef* key_field;
    301   const upb_fielddef* value_field;
    302   map_handlerdata_t* hd = ALLOC(map_handlerdata_t);
    303   hd->ofs = ofs;
    304   key_field = upb_msgdef_itof(mapentry_def, MAP_KEY_FIELD);
    305   assert(key_field != NULL);
    306   hd->key_field_type = upb_fielddef_type(key_field);
    307   value_field = upb_msgdef_itof(mapentry_def, MAP_VALUE_FIELD);
    308   assert(value_field != NULL);
    309   hd->value_field_type = upb_fielddef_type(value_field);
    310   hd->value_field_subdef = upb_fielddef_subdef(value_field);
    311 
    312   return hd;
    313 }
    314 
    315 // Handlers that set primitive values in oneofs.
    316 #define DEFINE_ONEOF_HANDLER(type, ctype)                           \
    317   static bool oneof##type##_handler(void *closure, const void *hd,  \
    318                                      ctype val) {                   \
    319     const oneof_handlerdata_t *oneofdata = hd;                      \
    320     DEREF(closure, oneofdata->case_ofs, uint32_t) =                 \
    321         oneofdata->oneof_case_num;                                  \
    322     DEREF(closure, oneofdata->ofs, ctype) = val;                    \
    323     return true;                                                    \
    324   }
    325 
    326 DEFINE_ONEOF_HANDLER(bool,   bool)
    327 DEFINE_ONEOF_HANDLER(int32,  int32_t)
    328 DEFINE_ONEOF_HANDLER(uint32, uint32_t)
    329 DEFINE_ONEOF_HANDLER(float,  float)
    330 DEFINE_ONEOF_HANDLER(int64,  int64_t)
    331 DEFINE_ONEOF_HANDLER(uint64, uint64_t)
    332 DEFINE_ONEOF_HANDLER(double, double)
    333 
    334 #undef DEFINE_ONEOF_HANDLER
    335 
    336 // Handlers for strings in a oneof.
    337 static void *oneofstr_handler(void *closure,
    338                               const void *hd,
    339                               size_t size_hint) {
    340   MessageHeader* msg = closure;
    341   const oneof_handlerdata_t *oneofdata = hd;
    342   VALUE str = rb_str_new2("");
    343   rb_enc_associate(str, kRubyStringUtf8Encoding);
    344   DEREF(msg, oneofdata->case_ofs, uint32_t) =
    345       oneofdata->oneof_case_num;
    346   DEREF(msg, oneofdata->ofs, VALUE) = str;
    347   return (void*)str;
    348 }
    349 
    350 static void *oneofbytes_handler(void *closure,
    351                                 const void *hd,
    352                                 size_t size_hint) {
    353   MessageHeader* msg = closure;
    354   const oneof_handlerdata_t *oneofdata = hd;
    355   VALUE str = rb_str_new2("");
    356   rb_enc_associate(str, kRubyString8bitEncoding);
    357   DEREF(msg, oneofdata->case_ofs, uint32_t) =
    358       oneofdata->oneof_case_num;
    359   DEREF(msg, oneofdata->ofs, VALUE) = str;
    360   return (void*)str;
    361 }
    362 
    363 // Handler for a submessage field in a oneof.
    364 static void *oneofsubmsg_handler(void *closure,
    365                                  const void *hd) {
    366   MessageHeader* msg = closure;
    367   const oneof_handlerdata_t *oneofdata = hd;
    368   uint32_t oldcase = DEREF(msg, oneofdata->case_ofs, uint32_t);
    369 
    370   VALUE subdesc =
    371       get_def_obj((void*)oneofdata->md);
    372   VALUE subklass = Descriptor_msgclass(subdesc);
    373   VALUE submsg_rb;
    374   MessageHeader* submsg;
    375 
    376   if (oldcase != oneofdata->oneof_case_num ||
    377       DEREF(msg, oneofdata->ofs, VALUE) == Qnil) {
    378     DEREF(msg, oneofdata->ofs, VALUE) =
    379         rb_class_new_instance(0, NULL, subklass);
    380   }
    381   // Set the oneof case *after* allocating the new class instance -- otherwise,
    382   // if the Ruby GC is invoked as part of a call into the VM, it might invoke
    383   // our mark routines, and our mark routines might see the case value
    384   // indicating a VALUE is present and expect a valid VALUE. See comment in
    385   // layout_set() for more detail: basically, the change to the value and the
    386   // case must be atomic w.r.t. the Ruby VM.
    387   DEREF(msg, oneofdata->case_ofs, uint32_t) =
    388       oneofdata->oneof_case_num;
    389 
    390   submsg_rb = DEREF(msg, oneofdata->ofs, VALUE);
    391   TypedData_Get_Struct(submsg_rb, MessageHeader, &Message_type, submsg);
    392   return submsg;
    393 }
    394 
    395 // Set up handlers for a repeated field.
    396 static void add_handlers_for_repeated_field(upb_handlers *h,
    397                                             const upb_fielddef *f,
    398                                             size_t offset) {
    399   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    400   upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
    401   upb_handlers_setstartseq(h, f, startseq_handler, &attr);
    402   upb_handlerattr_uninit(&attr);
    403 
    404   switch (upb_fielddef_type(f)) {
    405 
    406 #define SET_HANDLER(utype, ltype)                                 \
    407   case utype:                                                     \
    408     upb_handlers_set##ltype(h, f, append##ltype##_handler, NULL); \
    409     break;
    410 
    411     SET_HANDLER(UPB_TYPE_BOOL,   bool);
    412     SET_HANDLER(UPB_TYPE_INT32,  int32);
    413     SET_HANDLER(UPB_TYPE_UINT32, uint32);
    414     SET_HANDLER(UPB_TYPE_ENUM,   int32);
    415     SET_HANDLER(UPB_TYPE_FLOAT,  float);
    416     SET_HANDLER(UPB_TYPE_INT64,  int64);
    417     SET_HANDLER(UPB_TYPE_UINT64, uint64);
    418     SET_HANDLER(UPB_TYPE_DOUBLE, double);
    419 
    420 #undef SET_HANDLER
    421 
    422     case UPB_TYPE_STRING:
    423     case UPB_TYPE_BYTES: {
    424       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
    425       upb_handlers_setstartstr(h, f, is_bytes ?
    426                                appendbytes_handler : appendstr_handler,
    427                                NULL);
    428       upb_handlers_setstring(h, f, stringdata_handler, NULL);
    429       break;
    430     }
    431     case UPB_TYPE_MESSAGE: {
    432       upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    433       upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, 0, f));
    434       upb_handlers_setstartsubmsg(h, f, appendsubmsg_handler, &attr);
    435       upb_handlerattr_uninit(&attr);
    436       break;
    437     }
    438   }
    439 }
    440 
    441 // Set up handlers for a singular field.
    442 static void add_handlers_for_singular_field(upb_handlers *h,
    443                                             const upb_fielddef *f,
    444                                             size_t offset) {
    445   switch (upb_fielddef_type(f)) {
    446     case UPB_TYPE_BOOL:
    447     case UPB_TYPE_INT32:
    448     case UPB_TYPE_UINT32:
    449     case UPB_TYPE_ENUM:
    450     case UPB_TYPE_FLOAT:
    451     case UPB_TYPE_INT64:
    452     case UPB_TYPE_UINT64:
    453     case UPB_TYPE_DOUBLE:
    454       upb_shim_set(h, f, offset, -1);
    455       break;
    456     case UPB_TYPE_STRING:
    457     case UPB_TYPE_BYTES: {
    458       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
    459       upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    460       upb_handlerattr_sethandlerdata(&attr, newhandlerdata(h, offset));
    461       upb_handlers_setstartstr(h, f,
    462                                is_bytes ? bytes_handler : str_handler,
    463                                &attr);
    464       upb_handlers_setstring(h, f, stringdata_handler, &attr);
    465       upb_handlerattr_uninit(&attr);
    466       break;
    467     }
    468     case UPB_TYPE_MESSAGE: {
    469       upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    470       upb_handlerattr_sethandlerdata(&attr, newsubmsghandlerdata(h, offset, f));
    471       upb_handlers_setstartsubmsg(h, f, submsg_handler, &attr);
    472       upb_handlerattr_uninit(&attr);
    473       break;
    474     }
    475   }
    476 }
    477 
    478 // Adds handlers to a map field.
    479 static void add_handlers_for_mapfield(upb_handlers* h,
    480                                       const upb_fielddef* fielddef,
    481                                       size_t offset,
    482                                       Descriptor* desc) {
    483   const upb_msgdef* map_msgdef = upb_fielddef_msgsubdef(fielddef);
    484   map_handlerdata_t* hd = new_map_handlerdata(offset, map_msgdef, desc);
    485   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    486 
    487   upb_handlers_addcleanup(h, hd, free);
    488   upb_handlerattr_sethandlerdata(&attr, hd);
    489   upb_handlers_setstartsubmsg(h, fielddef, startmapentry_handler, &attr);
    490   upb_handlerattr_uninit(&attr);
    491 }
    492 
    493 // Adds handlers to a map-entry msgdef.
    494 static void add_handlers_for_mapentry(const upb_msgdef* msgdef,
    495                                       upb_handlers* h,
    496                                       Descriptor* desc) {
    497   const upb_fielddef* key_field = map_entry_key(msgdef);
    498   const upb_fielddef* value_field = map_entry_value(msgdef);
    499   map_handlerdata_t* hd = new_map_handlerdata(0, msgdef, desc);
    500   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    501 
    502   upb_handlers_addcleanup(h, hd, free);
    503   upb_handlerattr_sethandlerdata(&attr, hd);
    504   upb_handlers_setendmsg(h, endmap_handler, &attr);
    505 
    506   add_handlers_for_singular_field(
    507       h, key_field,
    508       offsetof(map_parse_frame_t, key_storage));
    509   add_handlers_for_singular_field(
    510       h, value_field,
    511       offsetof(map_parse_frame_t, value_storage));
    512 }
    513 
    514 // Set up handlers for a oneof field.
    515 static void add_handlers_for_oneof_field(upb_handlers *h,
    516                                          const upb_fielddef *f,
    517                                          size_t offset,
    518                                          size_t oneof_case_offset) {
    519 
    520   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
    521   upb_handlerattr_sethandlerdata(
    522       &attr, newoneofhandlerdata(h, offset, oneof_case_offset, f));
    523 
    524   switch (upb_fielddef_type(f)) {
    525 
    526 #define SET_HANDLER(utype, ltype)                                 \
    527   case utype:                                                     \
    528     upb_handlers_set##ltype(h, f, oneof##ltype##_handler, &attr); \
    529     break;
    530 
    531     SET_HANDLER(UPB_TYPE_BOOL,   bool);
    532     SET_HANDLER(UPB_TYPE_INT32,  int32);
    533     SET_HANDLER(UPB_TYPE_UINT32, uint32);
    534     SET_HANDLER(UPB_TYPE_ENUM,   int32);
    535     SET_HANDLER(UPB_TYPE_FLOAT,  float);
    536     SET_HANDLER(UPB_TYPE_INT64,  int64);
    537     SET_HANDLER(UPB_TYPE_UINT64, uint64);
    538     SET_HANDLER(UPB_TYPE_DOUBLE, double);
    539 
    540 #undef SET_HANDLER
    541 
    542     case UPB_TYPE_STRING:
    543     case UPB_TYPE_BYTES: {
    544       bool is_bytes = upb_fielddef_type(f) == UPB_TYPE_BYTES;
    545       upb_handlers_setstartstr(h, f, is_bytes ?
    546                                oneofbytes_handler : oneofstr_handler,
    547                                &attr);
    548       upb_handlers_setstring(h, f, stringdata_handler, NULL);
    549       break;
    550     }
    551     case UPB_TYPE_MESSAGE: {
    552       upb_handlers_setstartsubmsg(h, f, oneofsubmsg_handler, &attr);
    553       break;
    554     }
    555   }
    556 
    557   upb_handlerattr_uninit(&attr);
    558 }
    559 
    560 
    561 static void add_handlers_for_message(const void *closure, upb_handlers *h) {
    562   const upb_msgdef* msgdef = upb_handlers_msgdef(h);
    563   Descriptor* desc = ruby_to_Descriptor(get_def_obj((void*)msgdef));
    564   upb_msg_field_iter i;
    565 
    566   // If this is a mapentry message type, set up a special set of handlers and
    567   // bail out of the normal (user-defined) message type handling.
    568   if (upb_msgdef_mapentry(msgdef)) {
    569     add_handlers_for_mapentry(msgdef, h, desc);
    570     return;
    571   }
    572 
    573   // Ensure layout exists. We may be invoked to create handlers for a given
    574   // message if we are included as a submsg of another message type before our
    575   // class is actually built, so to work around this, we just create the layout
    576   // (and handlers, in the class-building function) on-demand.
    577   if (desc->layout == NULL) {
    578     desc->layout = create_layout(desc->msgdef);
    579   }
    580 
    581   for (upb_msg_field_begin(&i, desc->msgdef);
    582        !upb_msg_field_done(&i);
    583        upb_msg_field_next(&i)) {
    584     const upb_fielddef *f = upb_msg_iter_field(&i);
    585     size_t offset = desc->layout->fields[upb_fielddef_index(f)].offset +
    586         sizeof(MessageHeader);
    587 
    588     if (upb_fielddef_containingoneof(f)) {
    589       size_t oneof_case_offset =
    590           desc->layout->fields[upb_fielddef_index(f)].case_offset +
    591           sizeof(MessageHeader);
    592       add_handlers_for_oneof_field(h, f, offset, oneof_case_offset);
    593     } else if (is_map_field(f)) {
    594       add_handlers_for_mapfield(h, f, offset, desc);
    595     } else if (upb_fielddef_isseq(f)) {
    596       add_handlers_for_repeated_field(h, f, offset);
    597     } else {
    598       add_handlers_for_singular_field(h, f, offset);
    599     }
    600   }
    601 }
    602 
    603 // Creates upb handlers for populating a message.
    604 static const upb_handlers *new_fill_handlers(Descriptor* desc,
    605                                              const void* owner) {
    606   // TODO(cfallin, haberman): once upb gets a caching/memoization layer for
    607   // handlers, reuse subdef handlers so that e.g. if we already parse
    608   // B-with-field-of-type-C, we don't have to rebuild the whole hierarchy to
    609   // parse A-with-field-of-type-B-with-field-of-type-C.
    610   return upb_handlers_newfrozen(desc->msgdef, owner,
    611                                 add_handlers_for_message, NULL);
    612 }
    613 
    614 // Constructs the handlers for filling a message's data into an in-memory
    615 // object.
    616 const upb_handlers* get_fill_handlers(Descriptor* desc) {
    617   if (!desc->fill_handlers) {
    618     desc->fill_handlers =
    619         new_fill_handlers(desc, &desc->fill_handlers);
    620   }
    621   return desc->fill_handlers;
    622 }
    623 
    624 // Constructs the upb decoder method for parsing messages of this type.
    625 // This is called from the message class creation code.
    626 const upb_pbdecodermethod *new_fillmsg_decodermethod(Descriptor* desc,
    627                                                      const void* owner) {
    628   const upb_handlers* handlers = get_fill_handlers(desc);
    629   upb_pbdecodermethodopts opts;
    630   upb_pbdecodermethodopts_init(&opts, handlers);
    631 
    632   return upb_pbdecodermethod_new(&opts, owner);
    633 }
    634 
    635 static const upb_pbdecodermethod *msgdef_decodermethod(Descriptor* desc) {
    636   if (desc->fill_method == NULL) {
    637     desc->fill_method = new_fillmsg_decodermethod(
    638         desc, &desc->fill_method);
    639   }
    640   return desc->fill_method;
    641 }
    642 
    643 static const upb_json_parsermethod *msgdef_jsonparsermethod(Descriptor* desc) {
    644   if (desc->json_fill_method == NULL) {
    645     desc->json_fill_method =
    646         upb_json_parsermethod_new(desc->msgdef, &desc->json_fill_method);
    647   }
    648   return desc->json_fill_method;
    649 }
    650 
    651 
    652 // Stack-allocated context during an encode/decode operation. Contains the upb
    653 // environment and its stack-based allocator, an initial buffer for allocations
    654 // to avoid malloc() when possible, and a template for Ruby exception messages
    655 // if any error occurs.
    656 #define STACK_ENV_STACKBYTES 4096
    657 typedef struct {
    658   upb_env env;
    659   const char* ruby_error_template;
    660   char allocbuf[STACK_ENV_STACKBYTES];
    661 } stackenv;
    662 
    663 static void stackenv_init(stackenv* se, const char* errmsg);
    664 static void stackenv_uninit(stackenv* se);
    665 
    666 // Callback invoked by upb if any error occurs during parsing or serialization.
    667 static bool env_error_func(void* ud, const upb_status* status) {
    668   stackenv* se = ud;
    669   // Free the env -- rb_raise will longjmp up the stack past the encode/decode
    670   // function so it would not otherwise have been freed.
    671   stackenv_uninit(se);
    672 
    673   // TODO(haberman): have a way to verify that this is actually a parse error,
    674   // instead of just throwing "parse error" unconditionally.
    675   rb_raise(cParseError, se->ruby_error_template, upb_status_errmsg(status));
    676   // Never reached: rb_raise() always longjmp()s up the stack, past all of our
    677   // code, back to Ruby.
    678   return false;
    679 }
    680 
    681 static void stackenv_init(stackenv* se, const char* errmsg) {
    682   se->ruby_error_template = errmsg;
    683   upb_env_init2(&se->env, se->allocbuf, sizeof(se->allocbuf), NULL);
    684   upb_env_seterrorfunc(&se->env, env_error_func, se);
    685 }
    686 
    687 static void stackenv_uninit(stackenv* se) {
    688   upb_env_uninit(&se->env);
    689 }
    690 
    691 /*
    692  * call-seq:
    693  *     MessageClass.decode(data) => message
    694  *
    695  * Decodes the given data (as a string containing bytes in protocol buffers wire
    696  * format) under the interpretration given by this message class's definition
    697  * and returns a message object with the corresponding field values.
    698  */
    699 VALUE Message_decode(VALUE klass, VALUE data) {
    700   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
    701   Descriptor* desc = ruby_to_Descriptor(descriptor);
    702   VALUE msgklass = Descriptor_msgclass(descriptor);
    703   VALUE msg_rb;
    704   MessageHeader* msg;
    705 
    706   if (TYPE(data) != T_STRING) {
    707     rb_raise(rb_eArgError, "Expected string for binary protobuf data.");
    708   }
    709 
    710   msg_rb = rb_class_new_instance(0, NULL, msgklass);
    711   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
    712 
    713   {
    714     const upb_pbdecodermethod* method = msgdef_decodermethod(desc);
    715     const upb_handlers* h = upb_pbdecodermethod_desthandlers(method);
    716     stackenv se;
    717     upb_sink sink;
    718     upb_pbdecoder* decoder;
    719     stackenv_init(&se, "Error occurred during parsing: %s");
    720 
    721     upb_sink_reset(&sink, h, msg);
    722     decoder = upb_pbdecoder_create(&se.env, method, &sink);
    723     upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
    724                       upb_pbdecoder_input(decoder));
    725 
    726     stackenv_uninit(&se);
    727   }
    728 
    729   return msg_rb;
    730 }
    731 
    732 /*
    733  * call-seq:
    734  *     MessageClass.decode_json(data) => message
    735  *
    736  * Decodes the given data (as a string containing bytes in protocol buffers wire
    737  * format) under the interpretration given by this message class's definition
    738  * and returns a message object with the corresponding field values.
    739  */
    740 VALUE Message_decode_json(VALUE klass, VALUE data) {
    741   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
    742   Descriptor* desc = ruby_to_Descriptor(descriptor);
    743   VALUE msgklass = Descriptor_msgclass(descriptor);
    744   VALUE msg_rb;
    745   MessageHeader* msg;
    746 
    747   if (TYPE(data) != T_STRING) {
    748     rb_raise(rb_eArgError, "Expected string for JSON data.");
    749   }
    750   // TODO(cfallin): Check and respect string encoding. If not UTF-8, we need to
    751   // convert, because string handlers pass data directly to message string
    752   // fields.
    753 
    754   msg_rb = rb_class_new_instance(0, NULL, msgklass);
    755   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
    756 
    757   {
    758     const upb_json_parsermethod* method = msgdef_jsonparsermethod(desc);
    759     stackenv se;
    760     upb_sink sink;
    761     upb_json_parser* parser;
    762     stackenv_init(&se, "Error occurred during parsing: %s");
    763 
    764     upb_sink_reset(&sink, get_fill_handlers(desc), msg);
    765     parser = upb_json_parser_create(&se.env, method, &sink);
    766     upb_bufsrc_putbuf(RSTRING_PTR(data), RSTRING_LEN(data),
    767                       upb_json_parser_input(parser));
    768 
    769     stackenv_uninit(&se);
    770   }
    771 
    772   return msg_rb;
    773 }
    774 
    775 // -----------------------------------------------------------------------------
    776 // Serializing.
    777 // -----------------------------------------------------------------------------
    778 //
    779 // The code below also comes from upb's prototype Ruby binding, developed by
    780 // haberman@.
    781 
    782 /* stringsink *****************************************************************/
    783 
    784 // This should probably be factored into a common upb component.
    785 
    786 typedef struct {
    787   upb_byteshandler handler;
    788   upb_bytessink sink;
    789   char *ptr;
    790   size_t len, size;
    791 } stringsink;
    792 
    793 static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
    794   stringsink *sink = _sink;
    795   sink->len = 0;
    796   return sink;
    797 }
    798 
    799 static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
    800                                 size_t len, const upb_bufhandle *handle) {
    801   stringsink *sink = _sink;
    802   size_t new_size = sink->size;
    803 
    804   UPB_UNUSED(hd);
    805   UPB_UNUSED(handle);
    806 
    807   while (sink->len + len > new_size) {
    808     new_size *= 2;
    809   }
    810 
    811   if (new_size != sink->size) {
    812     sink->ptr = realloc(sink->ptr, new_size);
    813     sink->size = new_size;
    814   }
    815 
    816   memcpy(sink->ptr + sink->len, ptr, len);
    817   sink->len += len;
    818 
    819   return len;
    820 }
    821 
    822 void stringsink_init(stringsink *sink) {
    823   upb_byteshandler_init(&sink->handler);
    824   upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
    825   upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
    826 
    827   upb_bytessink_reset(&sink->sink, &sink->handler, sink);
    828 
    829   sink->size = 32;
    830   sink->ptr = malloc(sink->size);
    831   sink->len = 0;
    832 }
    833 
    834 void stringsink_uninit(stringsink *sink) {
    835   free(sink->ptr);
    836 }
    837 
    838 /* msgvisitor *****************************************************************/
    839 
    840 // TODO: If/when we support proto2 semantics in addition to the current proto3
    841 // semantics, which means that we have true field presence, we will want to
    842 // modify msgvisitor so that it emits all present fields rather than all
    843 // non-default-value fields.
    844 //
    845 // Likewise, when implementing JSON serialization, we may need to have a
    846 // 'verbose' mode that outputs all fields and a 'concise' mode that outputs only
    847 // those with non-default values.
    848 
    849 static void putmsg(VALUE msg, const Descriptor* desc,
    850                    upb_sink *sink, int depth);
    851 
    852 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
    853   upb_selector_t ret;
    854   bool ok = upb_handlers_getselector(f, type, &ret);
    855   UPB_ASSERT_VAR(ok, ok);
    856   return ret;
    857 }
    858 
    859 static void putstr(VALUE str, const upb_fielddef *f, upb_sink *sink) {
    860   upb_sink subsink;
    861 
    862   if (str == Qnil) return;
    863 
    864   assert(BUILTIN_TYPE(str) == RUBY_T_STRING);
    865 
    866   // Ensure that the string has the correct encoding. We also check at field-set
    867   // time, but the user may have mutated the string object since then.
    868   native_slot_validate_string_encoding(upb_fielddef_type(f), str);
    869 
    870   upb_sink_startstr(sink, getsel(f, UPB_HANDLER_STARTSTR), RSTRING_LEN(str),
    871                     &subsink);
    872   upb_sink_putstring(&subsink, getsel(f, UPB_HANDLER_STRING), RSTRING_PTR(str),
    873                      RSTRING_LEN(str), NULL);
    874   upb_sink_endstr(sink, getsel(f, UPB_HANDLER_ENDSTR));
    875 }
    876 
    877 static void putsubmsg(VALUE submsg, const upb_fielddef *f, upb_sink *sink,
    878                       int depth) {
    879   upb_sink subsink;
    880   VALUE descriptor;
    881   Descriptor* subdesc;
    882 
    883   if (submsg == Qnil) return;
    884 
    885   descriptor = rb_ivar_get(submsg, descriptor_instancevar_interned);
    886   subdesc = ruby_to_Descriptor(descriptor);
    887 
    888   upb_sink_startsubmsg(sink, getsel(f, UPB_HANDLER_STARTSUBMSG), &subsink);
    889   putmsg(submsg, subdesc, &subsink, depth + 1);
    890   upb_sink_endsubmsg(sink, getsel(f, UPB_HANDLER_ENDSUBMSG));
    891 }
    892 
    893 static void putary(VALUE ary, const upb_fielddef *f, upb_sink *sink,
    894                    int depth) {
    895   upb_sink subsink;
    896   upb_fieldtype_t type = upb_fielddef_type(f);
    897   upb_selector_t sel = 0;
    898   int size;
    899 
    900   if (ary == Qnil) return;
    901 
    902   upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
    903 
    904   if (upb_fielddef_isprimitive(f)) {
    905     sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
    906   }
    907 
    908   size = NUM2INT(RepeatedField_length(ary));
    909   for (int i = 0; i < size; i++) {
    910     void* memory = RepeatedField_index_native(ary, i);
    911     switch (type) {
    912 #define T(upbtypeconst, upbtype, ctype)                         \
    913   case upbtypeconst:                                            \
    914     upb_sink_put##upbtype(&subsink, sel, *((ctype *)memory));   \
    915     break;
    916 
    917       T(UPB_TYPE_FLOAT,  float,  float)
    918       T(UPB_TYPE_DOUBLE, double, double)
    919       T(UPB_TYPE_BOOL,   bool,   int8_t)
    920       case UPB_TYPE_ENUM:
    921       T(UPB_TYPE_INT32,  int32,  int32_t)
    922       T(UPB_TYPE_UINT32, uint32, uint32_t)
    923       T(UPB_TYPE_INT64,  int64,  int64_t)
    924       T(UPB_TYPE_UINT64, uint64, uint64_t)
    925 
    926       case UPB_TYPE_STRING:
    927       case UPB_TYPE_BYTES:
    928         putstr(*((VALUE *)memory), f, &subsink);
    929         break;
    930       case UPB_TYPE_MESSAGE:
    931         putsubmsg(*((VALUE *)memory), f, &subsink, depth);
    932         break;
    933 
    934 #undef T
    935 
    936     }
    937   }
    938   upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
    939 }
    940 
    941 static void put_ruby_value(VALUE value,
    942                            const upb_fielddef *f,
    943                            VALUE type_class,
    944                            int depth,
    945                            upb_sink *sink) {
    946   upb_selector_t sel = 0;
    947   if (upb_fielddef_isprimitive(f)) {
    948     sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
    949   }
    950 
    951   switch (upb_fielddef_type(f)) {
    952     case UPB_TYPE_INT32:
    953       upb_sink_putint32(sink, sel, NUM2INT(value));
    954       break;
    955     case UPB_TYPE_INT64:
    956       upb_sink_putint64(sink, sel, NUM2LL(value));
    957       break;
    958     case UPB_TYPE_UINT32:
    959       upb_sink_putuint32(sink, sel, NUM2UINT(value));
    960       break;
    961     case UPB_TYPE_UINT64:
    962       upb_sink_putuint64(sink, sel, NUM2ULL(value));
    963       break;
    964     case UPB_TYPE_FLOAT:
    965       upb_sink_putfloat(sink, sel, NUM2DBL(value));
    966       break;
    967     case UPB_TYPE_DOUBLE:
    968       upb_sink_putdouble(sink, sel, NUM2DBL(value));
    969       break;
    970     case UPB_TYPE_ENUM: {
    971       if (TYPE(value) == T_SYMBOL) {
    972         value = rb_funcall(type_class, rb_intern("resolve"), 1, value);
    973       }
    974       upb_sink_putint32(sink, sel, NUM2INT(value));
    975       break;
    976     }
    977     case UPB_TYPE_BOOL:
    978       upb_sink_putbool(sink, sel, value == Qtrue);
    979       break;
    980     case UPB_TYPE_STRING:
    981     case UPB_TYPE_BYTES:
    982       putstr(value, f, sink);
    983       break;
    984     case UPB_TYPE_MESSAGE:
    985       putsubmsg(value, f, sink, depth);
    986   }
    987 }
    988 
    989 static void putmap(VALUE map, const upb_fielddef *f, upb_sink *sink,
    990                    int depth) {
    991   Map* self;
    992   upb_sink subsink;
    993   const upb_fielddef* key_field;
    994   const upb_fielddef* value_field;
    995   Map_iter it;
    996 
    997   if (map == Qnil) return;
    998   self = ruby_to_Map(map);
    999 
   1000   upb_sink_startseq(sink, getsel(f, UPB_HANDLER_STARTSEQ), &subsink);
   1001 
   1002   assert(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
   1003   key_field = map_field_key(f);
   1004   value_field = map_field_value(f);
   1005 
   1006   for (Map_begin(map, &it); !Map_done(&it); Map_next(&it)) {
   1007     VALUE key = Map_iter_key(&it);
   1008     VALUE value = Map_iter_value(&it);
   1009     upb_status status;
   1010 
   1011     upb_sink entry_sink;
   1012     upb_sink_startsubmsg(&subsink, getsel(f, UPB_HANDLER_STARTSUBMSG),
   1013                          &entry_sink);
   1014     upb_sink_startmsg(&entry_sink);
   1015 
   1016     put_ruby_value(key, key_field, Qnil, depth + 1, &entry_sink);
   1017     put_ruby_value(value, value_field, self->value_type_class, depth + 1,
   1018                    &entry_sink);
   1019 
   1020     upb_sink_endmsg(&entry_sink, &status);
   1021     upb_sink_endsubmsg(&subsink, getsel(f, UPB_HANDLER_ENDSUBMSG));
   1022   }
   1023 
   1024   upb_sink_endseq(sink, getsel(f, UPB_HANDLER_ENDSEQ));
   1025 }
   1026 
   1027 static void putmsg(VALUE msg_rb, const Descriptor* desc,
   1028                    upb_sink *sink, int depth) {
   1029   MessageHeader* msg;
   1030   upb_msg_field_iter i;
   1031   upb_status status;
   1032 
   1033   upb_sink_startmsg(sink);
   1034 
   1035   // Protect against cycles (possible because users may freely reassign message
   1036   // and repeated fields) by imposing a maximum recursion depth.
   1037   if (depth > ENCODE_MAX_NESTING) {
   1038     rb_raise(rb_eRuntimeError,
   1039              "Maximum recursion depth exceeded during encoding.");
   1040   }
   1041 
   1042   TypedData_Get_Struct(msg_rb, MessageHeader, &Message_type, msg);
   1043 
   1044   for (upb_msg_field_begin(&i, desc->msgdef);
   1045        !upb_msg_field_done(&i);
   1046        upb_msg_field_next(&i)) {
   1047     upb_fielddef *f = upb_msg_iter_field(&i);
   1048     bool is_matching_oneof = false;
   1049     uint32_t offset =
   1050         desc->layout->fields[upb_fielddef_index(f)].offset +
   1051         sizeof(MessageHeader);
   1052 
   1053     if (upb_fielddef_containingoneof(f)) {
   1054       uint32_t oneof_case_offset =
   1055           desc->layout->fields[upb_fielddef_index(f)].case_offset +
   1056           sizeof(MessageHeader);
   1057       // For a oneof, check that this field is actually present -- skip all the
   1058       // below if not.
   1059       if (DEREF(msg, oneof_case_offset, uint32_t) !=
   1060           upb_fielddef_number(f)) {
   1061         continue;
   1062       }
   1063       // Otherwise, fall through to the appropriate singular-field handler
   1064       // below.
   1065       is_matching_oneof = true;
   1066     }
   1067 
   1068     if (is_map_field(f)) {
   1069       VALUE map = DEREF(msg, offset, VALUE);
   1070       if (map != Qnil) {
   1071         putmap(map, f, sink, depth);
   1072       }
   1073     } else if (upb_fielddef_isseq(f)) {
   1074       VALUE ary = DEREF(msg, offset, VALUE);
   1075       if (ary != Qnil) {
   1076         putary(ary, f, sink, depth);
   1077       }
   1078     } else if (upb_fielddef_isstring(f)) {
   1079       VALUE str = DEREF(msg, offset, VALUE);
   1080       if (is_matching_oneof || RSTRING_LEN(str) > 0) {
   1081         putstr(str, f, sink);
   1082       }
   1083     } else if (upb_fielddef_issubmsg(f)) {
   1084       putsubmsg(DEREF(msg, offset, VALUE), f, sink, depth);
   1085     } else {
   1086       upb_selector_t sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
   1087 
   1088 #define T(upbtypeconst, upbtype, ctype, default_value)                \
   1089   case upbtypeconst: {                                                \
   1090       ctype value = DEREF(msg, offset, ctype);                        \
   1091       if (is_matching_oneof || value != default_value) {              \
   1092         upb_sink_put##upbtype(sink, sel, value);                      \
   1093       }                                                               \
   1094     }                                                                 \
   1095     break;
   1096 
   1097       switch (upb_fielddef_type(f)) {
   1098         T(UPB_TYPE_FLOAT,  float,  float, 0.0)
   1099         T(UPB_TYPE_DOUBLE, double, double, 0.0)
   1100         T(UPB_TYPE_BOOL,   bool,   uint8_t, 0)
   1101         case UPB_TYPE_ENUM:
   1102         T(UPB_TYPE_INT32,  int32,  int32_t, 0)
   1103         T(UPB_TYPE_UINT32, uint32, uint32_t, 0)
   1104         T(UPB_TYPE_INT64,  int64,  int64_t, 0)
   1105         T(UPB_TYPE_UINT64, uint64, uint64_t, 0)
   1106 
   1107         case UPB_TYPE_STRING:
   1108         case UPB_TYPE_BYTES:
   1109         case UPB_TYPE_MESSAGE: rb_raise(rb_eRuntimeError, "Internal error.");
   1110       }
   1111 
   1112 #undef T
   1113 
   1114     }
   1115   }
   1116 
   1117   upb_sink_endmsg(sink, &status);
   1118 }
   1119 
   1120 static const upb_handlers* msgdef_pb_serialize_handlers(Descriptor* desc) {
   1121   if (desc->pb_serialize_handlers == NULL) {
   1122     desc->pb_serialize_handlers =
   1123         upb_pb_encoder_newhandlers(desc->msgdef, &desc->pb_serialize_handlers);
   1124   }
   1125   return desc->pb_serialize_handlers;
   1126 }
   1127 
   1128 static const upb_handlers* msgdef_json_serialize_handlers(
   1129     Descriptor* desc, bool preserve_proto_fieldnames) {
   1130   if (preserve_proto_fieldnames) {
   1131     if (desc->json_serialize_handlers == NULL) {
   1132       desc->json_serialize_handlers =
   1133           upb_json_printer_newhandlers(
   1134               desc->msgdef, true, &desc->json_serialize_handlers);
   1135     }
   1136     return desc->json_serialize_handlers;
   1137   } else {
   1138     if (desc->json_serialize_handlers_preserve == NULL) {
   1139       desc->json_serialize_handlers_preserve =
   1140           upb_json_printer_newhandlers(
   1141               desc->msgdef, false, &desc->json_serialize_handlers_preserve);
   1142     }
   1143     return desc->json_serialize_handlers_preserve;
   1144   }
   1145 }
   1146 
   1147 /*
   1148  * call-seq:
   1149  *     MessageClass.encode(msg) => bytes
   1150  *
   1151  * Encodes the given message object to its serialized form in protocol buffers
   1152  * wire format.
   1153  */
   1154 VALUE Message_encode(VALUE klass, VALUE msg_rb) {
   1155   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
   1156   Descriptor* desc = ruby_to_Descriptor(descriptor);
   1157 
   1158   stringsink sink;
   1159   stringsink_init(&sink);
   1160 
   1161   {
   1162     const upb_handlers* serialize_handlers =
   1163         msgdef_pb_serialize_handlers(desc);
   1164 
   1165     stackenv se;
   1166     upb_pb_encoder* encoder;
   1167     VALUE ret;
   1168 
   1169     stackenv_init(&se, "Error occurred during encoding: %s");
   1170     encoder = upb_pb_encoder_create(&se.env, serialize_handlers, &sink.sink);
   1171 
   1172     putmsg(msg_rb, desc, upb_pb_encoder_input(encoder), 0);
   1173 
   1174     ret = rb_str_new(sink.ptr, sink.len);
   1175 
   1176     stackenv_uninit(&se);
   1177     stringsink_uninit(&sink);
   1178 
   1179     return ret;
   1180   }
   1181 }
   1182 
   1183 /*
   1184  * call-seq:
   1185  *     MessageClass.encode_json(msg) => json_string
   1186  *
   1187  * Encodes the given message object into its serialized JSON representation.
   1188  */
   1189 VALUE Message_encode_json(int argc, VALUE* argv, VALUE klass) {
   1190   VALUE descriptor = rb_ivar_get(klass, descriptor_instancevar_interned);
   1191   Descriptor* desc = ruby_to_Descriptor(descriptor);
   1192   VALUE msg_rb;
   1193   VALUE preserve_proto_fieldnames = Qfalse;
   1194   stringsink sink;
   1195 
   1196   if (argc < 1 || argc > 2) {
   1197     rb_raise(rb_eArgError, "Expected 1 or 2 arguments.");
   1198   }
   1199 
   1200   msg_rb = argv[0];
   1201 
   1202   if (argc == 2) {
   1203     VALUE hash_args = argv[1];
   1204     if (TYPE(hash_args) != T_HASH) {
   1205       rb_raise(rb_eArgError, "Expected hash arguments.");
   1206     }
   1207     preserve_proto_fieldnames = rb_hash_lookup2(
   1208         hash_args, ID2SYM(rb_intern("preserve_proto_fieldnames")), Qfalse);
   1209   }
   1210 
   1211   stringsink_init(&sink);
   1212 
   1213   {
   1214     const upb_handlers* serialize_handlers =
   1215         msgdef_json_serialize_handlers(desc, RTEST(preserve_proto_fieldnames));
   1216     upb_json_printer* printer;
   1217     stackenv se;
   1218     VALUE ret;
   1219 
   1220     stackenv_init(&se, "Error occurred during encoding: %s");
   1221     printer = upb_json_printer_create(&se.env, serialize_handlers, &sink.sink);
   1222 
   1223     putmsg(msg_rb, desc, upb_json_printer_input(printer), 0);
   1224 
   1225     ret = rb_enc_str_new(sink.ptr, sink.len, rb_utf8_encoding());
   1226 
   1227     stackenv_uninit(&se);
   1228     stringsink_uninit(&sink);
   1229 
   1230     return ret;
   1231   }
   1232 }
   1233 
   1234