Home | History | Annotate | Download | only in protobuf
      1 // Amalgamated source file
      2 #include "upb.h"
      3 
      4 
      5 #include <stdlib.h>
      6 #include <string.h>
      7 
      8 typedef struct {
      9   size_t len;
     10   char str[1];  /* Null-terminated string data follows. */
     11 } str_t;
     12 
     13 static str_t *newstr(const char *data, size_t len) {
     14   str_t *ret = malloc(sizeof(*ret) + len);
     15   if (!ret) return NULL;
     16   ret->len = len;
     17   memcpy(ret->str, data, len);
     18   ret->str[len] = '\0';
     19   return ret;
     20 }
     21 
     22 static void freestr(str_t *s) { free(s); }
     23 
     24 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
     25 static bool upb_isbetween(char c, char low, char high) {
     26   return c >= low && c <= high;
     27 }
     28 
     29 static bool upb_isletter(char c) {
     30   return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
     31 }
     32 
     33 static bool upb_isalphanum(char c) {
     34   return upb_isletter(c) || upb_isbetween(c, '0', '9');
     35 }
     36 
     37 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) {
     38   bool start = true;
     39   size_t i;
     40   for (i = 0; i < len; i++) {
     41     char c = str[i];
     42     if (c == '.') {
     43       if (start || !full) {
     44         upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
     45         return false;
     46       }
     47       start = true;
     48     } else if (start) {
     49       if (!upb_isletter(c)) {
     50         upb_status_seterrf(
     51             s, "invalid name: path components must start with a letter (%s)",
     52             str);
     53         return false;
     54       }
     55       start = false;
     56     } else {
     57       if (!upb_isalphanum(c)) {
     58         upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
     59                            str);
     60         return false;
     61       }
     62     }
     63   }
     64   return !start;
     65 }
     66 
     67 
     68 /* upb_def ********************************************************************/
     69 
     70 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; }
     71 
     72 const char *upb_def_fullname(const upb_def *d) { return d->fullname; }
     73 
     74 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) {
     75   assert(!upb_def_isfrozen(def));
     76   if (!upb_isident(fullname, strlen(fullname), true, s)) return false;
     77   free((void*)def->fullname);
     78   def->fullname = upb_strdup(fullname);
     79   return true;
     80 }
     81 
     82 upb_def *upb_def_dup(const upb_def *def, const void *o) {
     83   switch (def->type) {
     84     case UPB_DEF_MSG:
     85       return upb_msgdef_upcast_mutable(
     86           upb_msgdef_dup(upb_downcast_msgdef(def), o));
     87     case UPB_DEF_FIELD:
     88       return upb_fielddef_upcast_mutable(
     89           upb_fielddef_dup(upb_downcast_fielddef(def), o));
     90     case UPB_DEF_ENUM:
     91       return upb_enumdef_upcast_mutable(
     92           upb_enumdef_dup(upb_downcast_enumdef(def), o));
     93     default: assert(false); return NULL;
     94   }
     95 }
     96 
     97 static bool upb_def_init(upb_def *def, upb_deftype_t type,
     98                          const struct upb_refcounted_vtbl *vtbl,
     99                          const void *owner) {
    100   if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false;
    101   def->type = type;
    102   def->fullname = NULL;
    103   def->came_from_user = false;
    104   return true;
    105 }
    106 
    107 static void upb_def_uninit(upb_def *def) {
    108   free((void*)def->fullname);
    109 }
    110 
    111 static const char *msgdef_name(const upb_msgdef *m) {
    112   const char *name = upb_def_fullname(upb_msgdef_upcast(m));
    113   return name ? name : "(anonymous)";
    114 }
    115 
    116 static bool upb_validate_field(upb_fielddef *f, upb_status *s) {
    117   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
    118     upb_status_seterrmsg(s, "fielddef must have name and number set");
    119     return false;
    120   }
    121 
    122   if (!f->type_is_set_) {
    123     upb_status_seterrmsg(s, "fielddef type was not initialized");
    124     return false;
    125   }
    126 
    127   if (upb_fielddef_lazy(f) &&
    128       upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) {
    129     upb_status_seterrmsg(s,
    130                          "only length-delimited submessage fields may be lazy");
    131     return false;
    132   }
    133 
    134   if (upb_fielddef_hassubdef(f)) {
    135     const upb_def *subdef;
    136 
    137     if (f->subdef_is_symbolic) {
    138       upb_status_seterrf(s, "field '%s.%s' has not been resolved",
    139                          msgdef_name(f->msg.def), upb_fielddef_name(f));
    140       return false;
    141     }
    142 
    143     subdef = upb_fielddef_subdef(f);
    144     if (subdef == NULL) {
    145       upb_status_seterrf(s, "field %s.%s is missing required subdef",
    146                          msgdef_name(f->msg.def), upb_fielddef_name(f));
    147       return false;
    148     }
    149 
    150     if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) {
    151       upb_status_seterrf(s,
    152                          "subdef of field %s.%s is not frozen or being frozen",
    153                          msgdef_name(f->msg.def), upb_fielddef_name(f));
    154       return false;
    155     }
    156   }
    157 
    158   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
    159     bool has_default_name = upb_fielddef_enumhasdefaultstr(f);
    160     bool has_default_number = upb_fielddef_enumhasdefaultint32(f);
    161 
    162     /* Previously verified by upb_validate_enumdef(). */
    163     assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0);
    164 
    165     /* We've already validated that we have an associated enumdef and that it
    166      * has at least one member, so at least one of these should be true.
    167      * Because if the user didn't set anything, we'll pick up the enum's
    168      * default, but if the user *did* set something we should at least pick up
    169      * the one they set (int32 or string). */
    170     assert(has_default_name || has_default_number);
    171 
    172     if (!has_default_name) {
    173       upb_status_seterrf(s,
    174                          "enum default for field %s.%s (%d) is not in the enum",
    175                          msgdef_name(f->msg.def), upb_fielddef_name(f),
    176                          upb_fielddef_defaultint32(f));
    177       return false;
    178     }
    179 
    180     if (!has_default_number) {
    181       upb_status_seterrf(s,
    182                          "enum default for field %s.%s (%s) is not in the enum",
    183                          msgdef_name(f->msg.def), upb_fielddef_name(f),
    184                          upb_fielddef_defaultstr(f, NULL));
    185       return false;
    186     }
    187 
    188     /* Lift the effective numeric default into the field's default slot, in case
    189      * we were only getting it "by reference" from the enumdef. */
    190     upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f));
    191   }
    192 
    193   /* Ensure that MapEntry submessages only appear as repeated fields, not
    194    * optional/required (singular) fields. */
    195   if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
    196       upb_fielddef_msgsubdef(f) != NULL) {
    197     const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
    198     if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) {
    199       upb_status_seterrf(s,
    200                          "Field %s refers to mapentry message but is not "
    201                          "a repeated field",
    202                          upb_fielddef_name(f) ? upb_fielddef_name(f) :
    203                          "(unnamed)");
    204       return false;
    205     }
    206   }
    207 
    208   return true;
    209 }
    210 
    211 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) {
    212   if (upb_enumdef_numvals(e) == 0) {
    213     upb_status_seterrf(s, "enum %s has no members (must have at least one)",
    214                        upb_enumdef_fullname(e));
    215     return false;
    216   }
    217 
    218   return true;
    219 }
    220 
    221 /* All submessage fields are lower than all other fields.
    222  * Secondly, fields are increasing in order. */
    223 uint32_t field_rank(const upb_fielddef *f) {
    224   uint32_t ret = upb_fielddef_number(f);
    225   const uint32_t high_bit = 1 << 30;
    226   assert(ret < high_bit);
    227   if (!upb_fielddef_issubmsg(f))
    228     ret |= high_bit;
    229   return ret;
    230 }
    231 
    232 int cmp_fields(const void *p1, const void *p2) {
    233   const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
    234   const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
    235   return field_rank(f1) - field_rank(f2);
    236 }
    237 
    238 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
    239   /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
    240    * lowest indexes, but we do not publicly guarantee this. */
    241   upb_msg_field_iter j;
    242   int i;
    243   uint32_t selector;
    244   int n = upb_msgdef_numfields(m);
    245   upb_fielddef **fields = malloc(n * sizeof(*fields));
    246   if (!fields) return false;
    247 
    248   m->submsg_field_count = 0;
    249   for(i = 0, upb_msg_field_begin(&j, m);
    250       !upb_msg_field_done(&j);
    251       upb_msg_field_next(&j), i++) {
    252     upb_fielddef *f = upb_msg_iter_field(&j);
    253     assert(f->msg.def == m);
    254     if (!upb_validate_field(f, s)) {
    255       free(fields);
    256       return false;
    257     }
    258     if (upb_fielddef_issubmsg(f)) {
    259       m->submsg_field_count++;
    260     }
    261     fields[i] = f;
    262   }
    263 
    264   qsort(fields, n, sizeof(*fields), cmp_fields);
    265 
    266   selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
    267   for (i = 0; i < n; i++) {
    268     upb_fielddef *f = fields[i];
    269     f->index_ = i;
    270     f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
    271     selector += upb_handlers_selectorcount(f);
    272   }
    273   m->selector_count = selector;
    274 
    275 #ifndef NDEBUG
    276   {
    277     /* Verify that all selectors for the message are distinct. */
    278 #define TRY(type) \
    279     if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v);
    280 
    281     upb_inttable t;
    282     upb_value v;
    283     upb_selector_t sel;
    284 
    285     upb_inttable_init(&t, UPB_CTYPE_BOOL);
    286     v = upb_value_bool(true);
    287     upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v);
    288     upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v);
    289     for(upb_msg_field_begin(&j, m);
    290         !upb_msg_field_done(&j);
    291         upb_msg_field_next(&j)) {
    292       upb_fielddef *f = upb_msg_iter_field(&j);
    293       /* These calls will assert-fail in upb_table if the value already
    294        * exists. */
    295       TRY(UPB_HANDLER_INT32);
    296       TRY(UPB_HANDLER_INT64)
    297       TRY(UPB_HANDLER_UINT32)
    298       TRY(UPB_HANDLER_UINT64)
    299       TRY(UPB_HANDLER_FLOAT)
    300       TRY(UPB_HANDLER_DOUBLE)
    301       TRY(UPB_HANDLER_BOOL)
    302       TRY(UPB_HANDLER_STARTSTR)
    303       TRY(UPB_HANDLER_STRING)
    304       TRY(UPB_HANDLER_ENDSTR)
    305       TRY(UPB_HANDLER_STARTSUBMSG)
    306       TRY(UPB_HANDLER_ENDSUBMSG)
    307       TRY(UPB_HANDLER_STARTSEQ)
    308       TRY(UPB_HANDLER_ENDSEQ)
    309     }
    310     upb_inttable_uninit(&t);
    311   }
    312 #undef TRY
    313 #endif
    314 
    315   free(fields);
    316   return true;
    317 }
    318 
    319 bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) {
    320   int i;
    321   int maxdepth;
    322   bool ret;
    323   upb_status_clear(s);
    324 
    325   /* First perform validation, in two passes so we can check that we have a
    326    * transitive closure without needing to search. */
    327   for (i = 0; i < n; i++) {
    328     upb_def *def = defs[i];
    329     if (upb_def_isfrozen(def)) {
    330       /* Could relax this requirement if it's annoying. */
    331       upb_status_seterrmsg(s, "def is already frozen");
    332       goto err;
    333     } else if (def->type == UPB_DEF_FIELD) {
    334       upb_status_seterrmsg(s, "standalone fielddefs can not be frozen");
    335       goto err;
    336     } else if (def->type == UPB_DEF_ENUM) {
    337       if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) {
    338         goto err;
    339       }
    340     } else {
    341       /* Set now to detect transitive closure in the second pass. */
    342       def->came_from_user = true;
    343     }
    344   }
    345 
    346   /* Second pass of validation.  Also assign selector bases and indexes, and
    347    * compact tables. */
    348   for (i = 0; i < n; i++) {
    349     upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]);
    350     upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]);
    351     if (m) {
    352       upb_inttable_compact(&m->itof);
    353       if (!assign_msg_indices(m, s)) {
    354         goto err;
    355       }
    356     } else if (e) {
    357       upb_inttable_compact(&e->iton);
    358     }
    359   }
    360 
    361   /* Def graph contains FieldDefs between each MessageDef, so double the
    362    * limit. */
    363   maxdepth = UPB_MAX_MESSAGE_DEPTH * 2;
    364 
    365   /* Validation all passed; freeze the defs. */
    366   ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth);
    367   assert(!(s && ret != upb_ok(s)));
    368   return ret;
    369 
    370 err:
    371   for (i = 0; i < n; i++) {
    372     defs[i]->came_from_user = false;
    373   }
    374   assert(!(s && upb_ok(s)));
    375   return false;
    376 }
    377 
    378 
    379 /* upb_enumdef ****************************************************************/
    380 
    381 static void upb_enumdef_free(upb_refcounted *r) {
    382   upb_enumdef *e = (upb_enumdef*)r;
    383   upb_inttable_iter i;
    384   upb_inttable_begin(&i, &e->iton);
    385   for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) {
    386     /* To clean up the upb_strdup() from upb_enumdef_addval(). */
    387     free(upb_value_getcstr(upb_inttable_iter_value(&i)));
    388   }
    389   upb_strtable_uninit(&e->ntoi);
    390   upb_inttable_uninit(&e->iton);
    391   upb_def_uninit(upb_enumdef_upcast_mutable(e));
    392   free(e);
    393 }
    394 
    395 upb_enumdef *upb_enumdef_new(const void *owner) {
    396   static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free};
    397   upb_enumdef *e = malloc(sizeof(*e));
    398   if (!e) return NULL;
    399   if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner))
    400     goto err2;
    401   if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2;
    402   if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1;
    403   return e;
    404 
    405 err1:
    406   upb_strtable_uninit(&e->ntoi);
    407 err2:
    408   free(e);
    409   return NULL;
    410 }
    411 
    412 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) {
    413   upb_enum_iter i;
    414   upb_enumdef *new_e = upb_enumdef_new(owner);
    415   if (!new_e) return NULL;
    416   for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) {
    417     bool success = upb_enumdef_addval(
    418         new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL);
    419     if (!success) {
    420       upb_enumdef_unref(new_e, owner);
    421       return NULL;
    422     }
    423   }
    424   return new_e;
    425 }
    426 
    427 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) {
    428   upb_def *d = upb_enumdef_upcast_mutable(e);
    429   return upb_def_freeze(&d, 1, status);
    430 }
    431 
    432 const char *upb_enumdef_fullname(const upb_enumdef *e) {
    433   return upb_def_fullname(upb_enumdef_upcast(e));
    434 }
    435 
    436 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname,
    437                              upb_status *s) {
    438   return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s);
    439 }
    440 
    441 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num,
    442                         upb_status *status) {
    443   if (!upb_isident(name, strlen(name), false, status)) {
    444     return false;
    445   }
    446   if (upb_enumdef_ntoiz(e, name, NULL)) {
    447     upb_status_seterrf(status, "name '%s' is already defined", name);
    448     return false;
    449   }
    450   if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) {
    451     upb_status_seterrmsg(status, "out of memory");
    452     return false;
    453   }
    454   if (!upb_inttable_lookup(&e->iton, num, NULL) &&
    455       !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) {
    456     upb_status_seterrmsg(status, "out of memory");
    457     upb_strtable_remove(&e->ntoi, name, NULL);
    458     return false;
    459   }
    460   if (upb_enumdef_numvals(e) == 1) {
    461     bool ok = upb_enumdef_setdefault(e, num, NULL);
    462     UPB_ASSERT_VAR(ok, ok);
    463   }
    464   return true;
    465 }
    466 
    467 int32_t upb_enumdef_default(const upb_enumdef *e) {
    468   assert(upb_enumdef_iton(e, e->defaultval));
    469   return e->defaultval;
    470 }
    471 
    472 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) {
    473   assert(!upb_enumdef_isfrozen(e));
    474   if (!upb_enumdef_iton(e, val)) {
    475     upb_status_seterrf(s, "number '%d' is not in the enum.", val);
    476     return false;
    477   }
    478   e->defaultval = val;
    479   return true;
    480 }
    481 
    482 int upb_enumdef_numvals(const upb_enumdef *e) {
    483   return upb_strtable_count(&e->ntoi);
    484 }
    485 
    486 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
    487   /* We iterate over the ntoi table, to account for duplicate numbers. */
    488   upb_strtable_begin(i, &e->ntoi);
    489 }
    490 
    491 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
    492 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
    493 
    494 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
    495                       size_t len, int32_t *num) {
    496   upb_value v;
    497   if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
    498     return false;
    499   }
    500   if (num) *num = upb_value_getint32(v);
    501   return true;
    502 }
    503 
    504 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
    505   upb_value v;
    506   return upb_inttable_lookup32(&def->iton, num, &v) ?
    507       upb_value_getcstr(v) : NULL;
    508 }
    509 
    510 const char *upb_enum_iter_name(upb_enum_iter *iter) {
    511   return upb_strtable_iter_key(iter);
    512 }
    513 
    514 int32_t upb_enum_iter_number(upb_enum_iter *iter) {
    515   return upb_value_getint32(upb_strtable_iter_value(iter));
    516 }
    517 
    518 
    519 /* upb_fielddef ***************************************************************/
    520 
    521 static void upb_fielddef_init_default(upb_fielddef *f);
    522 
    523 static void upb_fielddef_uninit_default(upb_fielddef *f) {
    524   if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes)
    525     freestr(f->defaultval.bytes);
    526 }
    527 
    528 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit,
    529                        void *closure) {
    530   const upb_fielddef *f = (const upb_fielddef*)r;
    531   if (upb_fielddef_containingtype(f)) {
    532     visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure);
    533   }
    534   if (upb_fielddef_containingoneof(f)) {
    535     visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure);
    536   }
    537   if (upb_fielddef_subdef(f)) {
    538     visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure);
    539   }
    540 }
    541 
    542 static void freefield(upb_refcounted *r) {
    543   upb_fielddef *f = (upb_fielddef*)r;
    544   upb_fielddef_uninit_default(f);
    545   if (f->subdef_is_symbolic)
    546     free(f->sub.name);
    547   upb_def_uninit(upb_fielddef_upcast_mutable(f));
    548   free(f);
    549 }
    550 
    551 static const char *enumdefaultstr(const upb_fielddef *f) {
    552   const upb_enumdef *e;
    553   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
    554   e = upb_fielddef_enumsubdef(f);
    555   if (f->default_is_string && f->defaultval.bytes) {
    556     /* Default was explicitly set as a string. */
    557     str_t *s = f->defaultval.bytes;
    558     return s->str;
    559   } else if (e) {
    560     if (!f->default_is_string) {
    561       /* Default was explicitly set as an integer; look it up in enumdef. */
    562       const char *name = upb_enumdef_iton(e, f->defaultval.sint);
    563       if (name) {
    564         return name;
    565       }
    566     } else {
    567       /* Default is completely unset; pull enumdef default. */
    568       if (upb_enumdef_numvals(e) > 0) {
    569         const char *name = upb_enumdef_iton(e, upb_enumdef_default(e));
    570         assert(name);
    571         return name;
    572       }
    573     }
    574   }
    575   return NULL;
    576 }
    577 
    578 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) {
    579   const upb_enumdef *e;
    580   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
    581   e = upb_fielddef_enumsubdef(f);
    582   if (!f->default_is_string) {
    583     /* Default was explicitly set as an integer. */
    584     *val = f->defaultval.sint;
    585     return true;
    586   } else if (e) {
    587     if (f->defaultval.bytes) {
    588       /* Default was explicitly set as a str; try to lookup corresponding int. */
    589       str_t *s = f->defaultval.bytes;
    590       if (upb_enumdef_ntoiz(e, s->str, val)) {
    591         return true;
    592       }
    593     } else {
    594       /* Default is unset; try to pull in enumdef default. */
    595       if (upb_enumdef_numvals(e) > 0) {
    596         *val = upb_enumdef_default(e);
    597         return true;
    598       }
    599     }
    600   }
    601   return false;
    602 }
    603 
    604 upb_fielddef *upb_fielddef_new(const void *o) {
    605   static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield};
    606   upb_fielddef *f = malloc(sizeof(*f));
    607   if (!f) return NULL;
    608   if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) {
    609     free(f);
    610     return NULL;
    611   }
    612   f->msg.def = NULL;
    613   f->sub.def = NULL;
    614   f->oneof = NULL;
    615   f->subdef_is_symbolic = false;
    616   f->msg_is_symbolic = false;
    617   f->label_ = UPB_LABEL_OPTIONAL;
    618   f->type_ = UPB_TYPE_INT32;
    619   f->number_ = 0;
    620   f->type_is_set_ = false;
    621   f->tagdelim = false;
    622   f->is_extension_ = false;
    623   f->lazy_ = false;
    624   f->packed_ = true;
    625 
    626   /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work
    627    * with all integer types and is in some since more "default" since the most
    628    * normal-looking proto2 types int32/int64/uint32/uint64 use variable.
    629    *
    630    * Other options to consider:
    631    * - there is no default; users must set this manually (like type).
    632    * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to
    633    *   be an optimal default for signed integers. */
    634   f->intfmt = UPB_INTFMT_VARIABLE;
    635   return f;
    636 }
    637 
    638 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) {
    639   const char *srcname;
    640   upb_fielddef *newf = upb_fielddef_new(owner);
    641   if (!newf) return NULL;
    642   upb_fielddef_settype(newf, upb_fielddef_type(f));
    643   upb_fielddef_setlabel(newf, upb_fielddef_label(f));
    644   upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL);
    645   upb_fielddef_setname(newf, upb_fielddef_name(f), NULL);
    646   if (f->default_is_string && f->defaultval.bytes) {
    647     str_t *s = f->defaultval.bytes;
    648     upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL);
    649   } else {
    650     newf->default_is_string = f->default_is_string;
    651     newf->defaultval = f->defaultval;
    652   }
    653 
    654   if (f->subdef_is_symbolic) {
    655     srcname = f->sub.name;  /* Might be NULL. */
    656   } else {
    657     srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL;
    658   }
    659   if (srcname) {
    660     char *newname = malloc(strlen(f->sub.def->fullname) + 2);
    661     if (!newname) {
    662       upb_fielddef_unref(newf, owner);
    663       return NULL;
    664     }
    665     strcpy(newname, ".");
    666     strcat(newname, f->sub.def->fullname);
    667     upb_fielddef_setsubdefname(newf, newname, NULL);
    668     free(newname);
    669   }
    670 
    671   return newf;
    672 }
    673 
    674 bool upb_fielddef_typeisset(const upb_fielddef *f) {
    675   return f->type_is_set_;
    676 }
    677 
    678 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
    679   assert(f->type_is_set_);
    680   return f->type_;
    681 }
    682 
    683 uint32_t upb_fielddef_index(const upb_fielddef *f) {
    684   return f->index_;
    685 }
    686 
    687 upb_label_t upb_fielddef_label(const upb_fielddef *f) {
    688   return f->label_;
    689 }
    690 
    691 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) {
    692   return f->intfmt;
    693 }
    694 
    695 bool upb_fielddef_istagdelim(const upb_fielddef *f) {
    696   return f->tagdelim;
    697 }
    698 
    699 uint32_t upb_fielddef_number(const upb_fielddef *f) {
    700   return f->number_;
    701 }
    702 
    703 bool upb_fielddef_isextension(const upb_fielddef *f) {
    704   return f->is_extension_;
    705 }
    706 
    707 bool upb_fielddef_lazy(const upb_fielddef *f) {
    708   return f->lazy_;
    709 }
    710 
    711 bool upb_fielddef_packed(const upb_fielddef *f) {
    712   return f->packed_;
    713 }
    714 
    715 const char *upb_fielddef_name(const upb_fielddef *f) {
    716   return upb_def_fullname(upb_fielddef_upcast(f));
    717 }
    718 
    719 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
    720   return f->msg_is_symbolic ? NULL : f->msg.def;
    721 }
    722 
    723 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
    724   return f->oneof;
    725 }
    726 
    727 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) {
    728   return (upb_msgdef*)upb_fielddef_containingtype(f);
    729 }
    730 
    731 const char *upb_fielddef_containingtypename(upb_fielddef *f) {
    732   return f->msg_is_symbolic ? f->msg.name : NULL;
    733 }
    734 
    735 static void release_containingtype(upb_fielddef *f) {
    736   if (f->msg_is_symbolic) free(f->msg.name);
    737 }
    738 
    739 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name,
    740                                         upb_status *s) {
    741   assert(!upb_fielddef_isfrozen(f));
    742   if (upb_fielddef_containingtype(f)) {
    743     upb_status_seterrmsg(s, "field has already been added to a message.");
    744     return false;
    745   }
    746   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
    747    * may have a leading "."). */
    748   release_containingtype(f);
    749   f->msg.name = upb_strdup(name);
    750   f->msg_is_symbolic = true;
    751   return true;
    752 }
    753 
    754 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) {
    755   if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) {
    756     upb_status_seterrmsg(s, "Already added to message or oneof");
    757     return false;
    758   }
    759   return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s);
    760 }
    761 
    762 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) {
    763   UPB_UNUSED(f);
    764   UPB_UNUSED(type);
    765   assert(f->type_is_set_ && upb_fielddef_type(f) == type);
    766 }
    767 
    768 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
    769   chkdefaulttype(f, UPB_TYPE_INT64);
    770   return f->defaultval.sint;
    771 }
    772 
    773 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
    774   if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) {
    775     int32_t val;
    776     bool ok = enumdefaultint32(f, &val);
    777     UPB_ASSERT_VAR(ok, ok);
    778     return val;
    779   } else {
    780     chkdefaulttype(f, UPB_TYPE_INT32);
    781     return f->defaultval.sint;
    782   }
    783 }
    784 
    785 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
    786   chkdefaulttype(f, UPB_TYPE_UINT64);
    787   return f->defaultval.uint;
    788 }
    789 
    790 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
    791   chkdefaulttype(f, UPB_TYPE_UINT32);
    792   return f->defaultval.uint;
    793 }
    794 
    795 bool upb_fielddef_defaultbool(const upb_fielddef *f) {
    796   chkdefaulttype(f, UPB_TYPE_BOOL);
    797   return f->defaultval.uint;
    798 }
    799 
    800 float upb_fielddef_defaultfloat(const upb_fielddef *f) {
    801   chkdefaulttype(f, UPB_TYPE_FLOAT);
    802   return f->defaultval.flt;
    803 }
    804 
    805 double upb_fielddef_defaultdouble(const upb_fielddef *f) {
    806   chkdefaulttype(f, UPB_TYPE_DOUBLE);
    807   return f->defaultval.dbl;
    808 }
    809 
    810 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
    811   assert(f->type_is_set_);
    812   assert(upb_fielddef_type(f) == UPB_TYPE_STRING ||
    813          upb_fielddef_type(f) == UPB_TYPE_BYTES ||
    814          upb_fielddef_type(f) == UPB_TYPE_ENUM);
    815 
    816   if (upb_fielddef_type(f) == UPB_TYPE_ENUM) {
    817     const char *ret = enumdefaultstr(f);
    818     assert(ret);
    819     /* Enum defaults can't have embedded NULLs. */
    820     if (len) *len = strlen(ret);
    821     return ret;
    822   }
    823 
    824   if (f->default_is_string) {
    825     str_t *str = f->defaultval.bytes;
    826     if (len) *len = str->len;
    827     return str->str;
    828   }
    829 
    830   return NULL;
    831 }
    832 
    833 static void upb_fielddef_init_default(upb_fielddef *f) {
    834   f->default_is_string = false;
    835   switch (upb_fielddef_type(f)) {
    836     case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break;
    837     case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break;
    838     case UPB_TYPE_INT32:
    839     case UPB_TYPE_INT64: f->defaultval.sint = 0; break;
    840     case UPB_TYPE_UINT64:
    841     case UPB_TYPE_UINT32:
    842     case UPB_TYPE_BOOL: f->defaultval.uint = 0; break;
    843     case UPB_TYPE_STRING:
    844     case UPB_TYPE_BYTES:
    845       f->defaultval.bytes = newstr("", 0);
    846       f->default_is_string = true;
    847       break;
    848     case UPB_TYPE_MESSAGE: break;
    849     case UPB_TYPE_ENUM:
    850       /* This is our special sentinel that indicates "not set" for an enum. */
    851       f->default_is_string = true;
    852       f->defaultval.bytes = NULL;
    853       break;
    854   }
    855 }
    856 
    857 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) {
    858   return f->subdef_is_symbolic ? NULL : f->sub.def;
    859 }
    860 
    861 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
    862   const upb_def *def = upb_fielddef_subdef(f);
    863   return def ? upb_dyncast_msgdef(def) : NULL;
    864 }
    865 
    866 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
    867   const upb_def *def = upb_fielddef_subdef(f);
    868   return def ? upb_dyncast_enumdef(def) : NULL;
    869 }
    870 
    871 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) {
    872   return (upb_def*)upb_fielddef_subdef(f);
    873 }
    874 
    875 const char *upb_fielddef_subdefname(const upb_fielddef *f) {
    876   if (f->subdef_is_symbolic) {
    877     return f->sub.name;
    878   } else if (f->sub.def) {
    879     return upb_def_fullname(f->sub.def);
    880   } else {
    881     return NULL;
    882   }
    883 }
    884 
    885 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) {
    886   if (upb_fielddef_containingtype(f)) {
    887     upb_status_seterrmsg(
    888         s, "cannot change field number after adding to a message");
    889     return false;
    890   }
    891   if (number == 0 || number > UPB_MAX_FIELDNUMBER) {
    892     upb_status_seterrf(s, "invalid field number (%u)", number);
    893     return false;
    894   }
    895   f->number_ = number;
    896   return true;
    897 }
    898 
    899 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) {
    900   assert(!upb_fielddef_isfrozen(f));
    901   assert(upb_fielddef_checktype(type));
    902   upb_fielddef_uninit_default(f);
    903   f->type_ = type;
    904   f->type_is_set_ = true;
    905   upb_fielddef_init_default(f);
    906 }
    907 
    908 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) {
    909   assert(!upb_fielddef_isfrozen(f));
    910   switch (type) {
    911     case UPB_DESCRIPTOR_TYPE_DOUBLE:
    912       upb_fielddef_settype(f, UPB_TYPE_DOUBLE);
    913       break;
    914     case UPB_DESCRIPTOR_TYPE_FLOAT:
    915       upb_fielddef_settype(f, UPB_TYPE_FLOAT);
    916       break;
    917     case UPB_DESCRIPTOR_TYPE_INT64:
    918     case UPB_DESCRIPTOR_TYPE_SFIXED64:
    919     case UPB_DESCRIPTOR_TYPE_SINT64:
    920       upb_fielddef_settype(f, UPB_TYPE_INT64);
    921       break;
    922     case UPB_DESCRIPTOR_TYPE_UINT64:
    923     case UPB_DESCRIPTOR_TYPE_FIXED64:
    924       upb_fielddef_settype(f, UPB_TYPE_UINT64);
    925       break;
    926     case UPB_DESCRIPTOR_TYPE_INT32:
    927     case UPB_DESCRIPTOR_TYPE_SFIXED32:
    928     case UPB_DESCRIPTOR_TYPE_SINT32:
    929       upb_fielddef_settype(f, UPB_TYPE_INT32);
    930       break;
    931     case UPB_DESCRIPTOR_TYPE_UINT32:
    932     case UPB_DESCRIPTOR_TYPE_FIXED32:
    933       upb_fielddef_settype(f, UPB_TYPE_UINT32);
    934       break;
    935     case UPB_DESCRIPTOR_TYPE_BOOL:
    936       upb_fielddef_settype(f, UPB_TYPE_BOOL);
    937       break;
    938     case UPB_DESCRIPTOR_TYPE_STRING:
    939       upb_fielddef_settype(f, UPB_TYPE_STRING);
    940       break;
    941     case UPB_DESCRIPTOR_TYPE_BYTES:
    942       upb_fielddef_settype(f, UPB_TYPE_BYTES);
    943       break;
    944     case UPB_DESCRIPTOR_TYPE_GROUP:
    945     case UPB_DESCRIPTOR_TYPE_MESSAGE:
    946       upb_fielddef_settype(f, UPB_TYPE_MESSAGE);
    947       break;
    948     case UPB_DESCRIPTOR_TYPE_ENUM:
    949       upb_fielddef_settype(f, UPB_TYPE_ENUM);
    950       break;
    951     default: assert(false);
    952   }
    953 
    954   if (type == UPB_DESCRIPTOR_TYPE_FIXED64 ||
    955       type == UPB_DESCRIPTOR_TYPE_FIXED32 ||
    956       type == UPB_DESCRIPTOR_TYPE_SFIXED64 ||
    957       type == UPB_DESCRIPTOR_TYPE_SFIXED32) {
    958     upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED);
    959   } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 ||
    960              type == UPB_DESCRIPTOR_TYPE_SINT32) {
    961     upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG);
    962   } else {
    963     upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE);
    964   }
    965 
    966   upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP);
    967 }
    968 
    969 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
    970   switch (upb_fielddef_type(f)) {
    971     case UPB_TYPE_FLOAT:  return UPB_DESCRIPTOR_TYPE_FLOAT;
    972     case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE;
    973     case UPB_TYPE_BOOL:   return UPB_DESCRIPTOR_TYPE_BOOL;
    974     case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING;
    975     case UPB_TYPE_BYTES:  return UPB_DESCRIPTOR_TYPE_BYTES;
    976     case UPB_TYPE_ENUM:   return UPB_DESCRIPTOR_TYPE_ENUM;
    977     case UPB_TYPE_INT32:
    978       switch (upb_fielddef_intfmt(f)) {
    979         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32;
    980         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED32;
    981         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT32;
    982       }
    983     case UPB_TYPE_INT64:
    984       switch (upb_fielddef_intfmt(f)) {
    985         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64;
    986         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_SFIXED64;
    987         case UPB_INTFMT_ZIGZAG:   return UPB_DESCRIPTOR_TYPE_SINT64;
    988       }
    989     case UPB_TYPE_UINT32:
    990       switch (upb_fielddef_intfmt(f)) {
    991         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32;
    992         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED32;
    993         case UPB_INTFMT_ZIGZAG:   return -1;
    994       }
    995     case UPB_TYPE_UINT64:
    996       switch (upb_fielddef_intfmt(f)) {
    997         case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64;
    998         case UPB_INTFMT_FIXED:    return UPB_DESCRIPTOR_TYPE_FIXED64;
    999         case UPB_INTFMT_ZIGZAG:   return -1;
   1000       }
   1001     case UPB_TYPE_MESSAGE:
   1002       return upb_fielddef_istagdelim(f) ?
   1003           UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE;
   1004   }
   1005   return 0;
   1006 }
   1007 
   1008 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) {
   1009   assert(!upb_fielddef_isfrozen(f));
   1010   f->is_extension_ = is_extension;
   1011 }
   1012 
   1013 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) {
   1014   assert(!upb_fielddef_isfrozen(f));
   1015   f->lazy_ = lazy;
   1016 }
   1017 
   1018 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) {
   1019   assert(!upb_fielddef_isfrozen(f));
   1020   f->packed_ = packed;
   1021 }
   1022 
   1023 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) {
   1024   assert(!upb_fielddef_isfrozen(f));
   1025   assert(upb_fielddef_checklabel(label));
   1026   f->label_ = label;
   1027 }
   1028 
   1029 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) {
   1030   assert(!upb_fielddef_isfrozen(f));
   1031   assert(upb_fielddef_checkintfmt(fmt));
   1032   f->intfmt = fmt;
   1033 }
   1034 
   1035 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) {
   1036   assert(!upb_fielddef_isfrozen(f));
   1037   f->tagdelim = tag_delim;
   1038   f->tagdelim = tag_delim;
   1039 }
   1040 
   1041 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) {
   1042   if (!f->type_is_set_ || upb_fielddef_isfrozen(f) ||
   1043       upb_fielddef_type(f) != type) {
   1044     assert(false);
   1045     return false;
   1046   }
   1047   if (f->default_is_string) {
   1048     str_t *s = f->defaultval.bytes;
   1049     assert(s || type == UPB_TYPE_ENUM);
   1050     if (s) freestr(s);
   1051   }
   1052   f->default_is_string = false;
   1053   return true;
   1054 }
   1055 
   1056 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) {
   1057   if (checksetdefault(f, UPB_TYPE_INT64))
   1058     f->defaultval.sint = value;
   1059 }
   1060 
   1061 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) {
   1062   if ((upb_fielddef_type(f) == UPB_TYPE_ENUM &&
   1063        checksetdefault(f, UPB_TYPE_ENUM)) ||
   1064       checksetdefault(f, UPB_TYPE_INT32)) {
   1065     f->defaultval.sint = value;
   1066   }
   1067 }
   1068 
   1069 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) {
   1070   if (checksetdefault(f, UPB_TYPE_UINT64))
   1071     f->defaultval.uint = value;
   1072 }
   1073 
   1074 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) {
   1075   if (checksetdefault(f, UPB_TYPE_UINT32))
   1076     f->defaultval.uint = value;
   1077 }
   1078 
   1079 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) {
   1080   if (checksetdefault(f, UPB_TYPE_BOOL))
   1081     f->defaultval.uint = value;
   1082 }
   1083 
   1084 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) {
   1085   if (checksetdefault(f, UPB_TYPE_FLOAT))
   1086     f->defaultval.flt = value;
   1087 }
   1088 
   1089 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) {
   1090   if (checksetdefault(f, UPB_TYPE_DOUBLE))
   1091     f->defaultval.dbl = value;
   1092 }
   1093 
   1094 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len,
   1095                                 upb_status *s) {
   1096   str_t *str2;
   1097   assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM);
   1098   if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s))
   1099     return false;
   1100 
   1101   if (f->default_is_string) {
   1102     str_t *s = f->defaultval.bytes;
   1103     assert(s || f->type_ == UPB_TYPE_ENUM);
   1104     if (s) freestr(s);
   1105   } else {
   1106     assert(f->type_ == UPB_TYPE_ENUM);
   1107   }
   1108 
   1109   str2 = newstr(str, len);
   1110   f->defaultval.bytes = str2;
   1111   f->default_is_string = true;
   1112   return true;
   1113 }
   1114 
   1115 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str,
   1116                                  upb_status *s) {
   1117   assert(f->type_is_set_);
   1118   upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s);
   1119 }
   1120 
   1121 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) {
   1122   int32_t val;
   1123   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
   1124   return enumdefaultint32(f, &val);
   1125 }
   1126 
   1127 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) {
   1128   assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM);
   1129   return enumdefaultstr(f) != NULL;
   1130 }
   1131 
   1132 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef,
   1133                                  upb_status *s) {
   1134   if (f->type_ == UPB_TYPE_MESSAGE) {
   1135     if (upb_dyncast_msgdef(subdef)) return true;
   1136     upb_status_seterrmsg(s, "invalid subdef type for this submessage field");
   1137     return false;
   1138   } else if (f->type_ == UPB_TYPE_ENUM) {
   1139     if (upb_dyncast_enumdef(subdef)) return true;
   1140     upb_status_seterrmsg(s, "invalid subdef type for this enum field");
   1141     return false;
   1142   } else {
   1143     upb_status_seterrmsg(s, "only message and enum fields can have a subdef");
   1144     return false;
   1145   }
   1146 }
   1147 
   1148 static void release_subdef(upb_fielddef *f) {
   1149   if (f->subdef_is_symbolic) {
   1150     free(f->sub.name);
   1151   } else if (f->sub.def) {
   1152     upb_unref2(f->sub.def, f);
   1153   }
   1154 }
   1155 
   1156 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef,
   1157                             upb_status *s) {
   1158   assert(!upb_fielddef_isfrozen(f));
   1159   assert(upb_fielddef_hassubdef(f));
   1160   if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false;
   1161   release_subdef(f);
   1162   f->sub.def = subdef;
   1163   f->subdef_is_symbolic = false;
   1164   if (f->sub.def) upb_ref2(f->sub.def, f);
   1165   return true;
   1166 }
   1167 
   1168 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef,
   1169                                upb_status *s) {
   1170   return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s);
   1171 }
   1172 
   1173 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef,
   1174                                 upb_status *s) {
   1175   return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s);
   1176 }
   1177 
   1178 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name,
   1179                                 upb_status *s) {
   1180   assert(!upb_fielddef_isfrozen(f));
   1181   if (!upb_fielddef_hassubdef(f)) {
   1182     upb_status_seterrmsg(s, "field type does not accept a subdef");
   1183     return false;
   1184   }
   1185   /* TODO: validate name (upb_isident() doesn't quite work atm because this name
   1186    * may have a leading "."). */
   1187   release_subdef(f);
   1188   f->sub.name = upb_strdup(name);
   1189   f->subdef_is_symbolic = true;
   1190   return true;
   1191 }
   1192 
   1193 bool upb_fielddef_issubmsg(const upb_fielddef *f) {
   1194   return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
   1195 }
   1196 
   1197 bool upb_fielddef_isstring(const upb_fielddef *f) {
   1198   return upb_fielddef_type(f) == UPB_TYPE_STRING ||
   1199          upb_fielddef_type(f) == UPB_TYPE_BYTES;
   1200 }
   1201 
   1202 bool upb_fielddef_isseq(const upb_fielddef *f) {
   1203   return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
   1204 }
   1205 
   1206 bool upb_fielddef_isprimitive(const upb_fielddef *f) {
   1207   return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
   1208 }
   1209 
   1210 bool upb_fielddef_ismap(const upb_fielddef *f) {
   1211   return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
   1212          upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
   1213 }
   1214 
   1215 bool upb_fielddef_hassubdef(const upb_fielddef *f) {
   1216   return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
   1217 }
   1218 
   1219 static bool between(int32_t x, int32_t low, int32_t high) {
   1220   return x >= low && x <= high;
   1221 }
   1222 
   1223 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
   1224 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
   1225 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
   1226 
   1227 bool upb_fielddef_checkdescriptortype(int32_t type) {
   1228   return between(type, 1, 18);
   1229 }
   1230 
   1231 /* upb_msgdef *****************************************************************/
   1232 
   1233 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit,
   1234                      void *closure) {
   1235   upb_msg_oneof_iter o;
   1236   const upb_msgdef *m = (const upb_msgdef*)r;
   1237   upb_msg_field_iter i;
   1238   for(upb_msg_field_begin(&i, m);
   1239       !upb_msg_field_done(&i);
   1240       upb_msg_field_next(&i)) {
   1241     upb_fielddef *f = upb_msg_iter_field(&i);
   1242     visit(r, upb_fielddef_upcast2(f), closure);
   1243   }
   1244   for(upb_msg_oneof_begin(&o, m);
   1245       !upb_msg_oneof_done(&o);
   1246       upb_msg_oneof_next(&o)) {
   1247     upb_oneofdef *f = upb_msg_iter_oneof(&o);
   1248     visit(r, upb_oneofdef_upcast2(f), closure);
   1249   }
   1250 }
   1251 
   1252 static void freemsg(upb_refcounted *r) {
   1253   upb_msgdef *m = (upb_msgdef*)r;
   1254   upb_strtable_uninit(&m->ntoo);
   1255   upb_strtable_uninit(&m->ntof);
   1256   upb_inttable_uninit(&m->itof);
   1257   upb_def_uninit(upb_msgdef_upcast_mutable(m));
   1258   free(m);
   1259 }
   1260 
   1261 upb_msgdef *upb_msgdef_new(const void *owner) {
   1262   static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg};
   1263   upb_msgdef *m = malloc(sizeof(*m));
   1264   if (!m) return NULL;
   1265   if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner))
   1266     goto err2;
   1267   if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3;
   1268   if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2;
   1269   if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1;
   1270   m->map_entry = false;
   1271   return m;
   1272 
   1273 err1:
   1274   upb_strtable_uninit(&m->ntof);
   1275 err2:
   1276   upb_inttable_uninit(&m->itof);
   1277 err3:
   1278   free(m);
   1279   return NULL;
   1280 }
   1281 
   1282 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) {
   1283   bool ok;
   1284   upb_msg_field_iter i;
   1285   upb_msg_oneof_iter o;
   1286 
   1287   upb_msgdef *newm = upb_msgdef_new(owner);
   1288   if (!newm) return NULL;
   1289   ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm),
   1290                            upb_def_fullname(upb_msgdef_upcast(m)),
   1291                            NULL);
   1292   newm->map_entry = m->map_entry;
   1293   UPB_ASSERT_VAR(ok, ok);
   1294   for(upb_msg_field_begin(&i, m);
   1295       !upb_msg_field_done(&i);
   1296       upb_msg_field_next(&i)) {
   1297     upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f);
   1298     /* Fields in oneofs are dup'd below. */
   1299     if (upb_fielddef_containingoneof(f)) continue;
   1300     if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) {
   1301       upb_msgdef_unref(newm, owner);
   1302       return NULL;
   1303     }
   1304   }
   1305   for(upb_msg_oneof_begin(&o, m);
   1306       !upb_msg_oneof_done(&o);
   1307       upb_msg_oneof_next(&o)) {
   1308     upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f);
   1309     if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) {
   1310       upb_msgdef_unref(newm, owner);
   1311       return NULL;
   1312     }
   1313   }
   1314   return newm;
   1315 }
   1316 
   1317 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) {
   1318   upb_def *d = upb_msgdef_upcast_mutable(m);
   1319   return upb_def_freeze(&d, 1, status);
   1320 }
   1321 
   1322 const char *upb_msgdef_fullname(const upb_msgdef *m) {
   1323   return upb_def_fullname(upb_msgdef_upcast(m));
   1324 }
   1325 
   1326 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname,
   1327                             upb_status *s) {
   1328   return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s);
   1329 }
   1330 
   1331 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error
   1332  * on status |s| and return false if not. */
   1333 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f,
   1334                             upb_status *s) {
   1335   if (upb_fielddef_containingtype(f) != NULL) {
   1336     upb_status_seterrmsg(s, "fielddef already belongs to a message");
   1337     return false;
   1338   } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
   1339     upb_status_seterrmsg(s, "field name or number were not set");
   1340     return false;
   1341   } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) ||
   1342              upb_msgdef_itof(m, upb_fielddef_number(f))) {
   1343     upb_status_seterrmsg(s, "duplicate field name or number for field");
   1344     return false;
   1345   }
   1346   return true;
   1347 }
   1348 
   1349 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) {
   1350   release_containingtype(f);
   1351   f->msg.def = m;
   1352   f->msg_is_symbolic = false;
   1353   upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f));
   1354   upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f));
   1355   upb_ref2(f, m);
   1356   upb_ref2(m, f);
   1357   if (ref_donor) upb_fielddef_unref(f, ref_donor);
   1358 }
   1359 
   1360 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor,
   1361                          upb_status *s) {
   1362   /* TODO: extensions need to have a separate namespace, because proto2 allows a
   1363    * top-level extension (ie. one not in any package) to have the same name as a
   1364    * field from the message.
   1365    *
   1366    * This also implies that there needs to be a separate lookup-by-name method
   1367    * for extensions.  It seems desirable for iteration to return both extensions
   1368    * and non-extensions though.
   1369    *
   1370    * We also need to validate that the field number is in an extension range iff
   1371    * it is an extension.
   1372    *
   1373    * This method is idempotent. Check if |f| is already part of this msgdef and
   1374    * return immediately if so. */
   1375   if (upb_fielddef_containingtype(f) == m) {
   1376     return true;
   1377   }
   1378 
   1379   /* Check constraints for all fields before performing any action. */
   1380   if (!check_field_add(m, f, s)) {
   1381     return false;
   1382   } else if (upb_fielddef_containingoneof(f) != NULL) {
   1383     /* Fields in a oneof can only be added by adding the oneof to the msgdef. */
   1384     upb_status_seterrmsg(s, "fielddef is part of a oneof");
   1385     return false;
   1386   }
   1387 
   1388   /* Constraint checks ok, perform the action. */
   1389   add_field(m, f, ref_donor);
   1390   return true;
   1391 }
   1392 
   1393 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor,
   1394                          upb_status *s) {
   1395   upb_oneof_iter it;
   1396 
   1397   /* Check various conditions that would prevent this oneof from being added. */
   1398   if (upb_oneofdef_containingtype(o)) {
   1399     upb_status_seterrmsg(s, "oneofdef already belongs to a message");
   1400     return false;
   1401   } else if (upb_oneofdef_name(o) == NULL) {
   1402     upb_status_seterrmsg(s, "oneofdef name was not set");
   1403     return false;
   1404   } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) {
   1405     upb_status_seterrmsg(s, "duplicate oneof name");
   1406     return false;
   1407   }
   1408 
   1409   /* Check that all of the oneof's fields do not conflict with names or numbers
   1410    * of fields already in the message. */
   1411   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
   1412     const upb_fielddef *f = upb_oneof_iter_field(&it);
   1413     if (!check_field_add(m, f, s)) {
   1414       return false;
   1415     }
   1416   }
   1417 
   1418   /* Everything checks out -- commit now. */
   1419 
   1420   /* Add oneof itself first. */
   1421   o->parent = m;
   1422   upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o));
   1423   upb_ref2(o, m);
   1424   upb_ref2(m, o);
   1425 
   1426   /* Add each field of the oneof directly to the msgdef. */
   1427   for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) {
   1428     upb_fielddef *f = upb_oneof_iter_field(&it);
   1429     add_field(m, f, NULL);
   1430   }
   1431 
   1432   if (ref_donor) upb_oneofdef_unref(o, ref_donor);
   1433 
   1434   return true;
   1435 }
   1436 
   1437 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
   1438   upb_value val;
   1439   return upb_inttable_lookup32(&m->itof, i, &val) ?
   1440       upb_value_getptr(val) : NULL;
   1441 }
   1442 
   1443 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
   1444                                     size_t len) {
   1445   upb_value val;
   1446   return upb_strtable_lookup2(&m->ntof, name, len, &val) ?
   1447       upb_value_getptr(val) : NULL;
   1448 }
   1449 
   1450 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
   1451                                     size_t len) {
   1452   upb_value val;
   1453   return upb_strtable_lookup2(&m->ntoo, name, len, &val) ?
   1454       upb_value_getptr(val) : NULL;
   1455 }
   1456 
   1457 int upb_msgdef_numfields(const upb_msgdef *m) {
   1458   return upb_strtable_count(&m->ntof);
   1459 }
   1460 
   1461 int upb_msgdef_numoneofs(const upb_msgdef *m) {
   1462   return upb_strtable_count(&m->ntoo);
   1463 }
   1464 
   1465 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) {
   1466   assert(!upb_msgdef_isfrozen(m));
   1467   m->map_entry = map_entry;
   1468 }
   1469 
   1470 bool upb_msgdef_mapentry(const upb_msgdef *m) {
   1471   return m->map_entry;
   1472 }
   1473 
   1474 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
   1475   upb_inttable_begin(iter, &m->itof);
   1476 }
   1477 
   1478 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
   1479 
   1480 bool upb_msg_field_done(const upb_msg_field_iter *iter) {
   1481   return upb_inttable_done(iter);
   1482 }
   1483 
   1484 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
   1485   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
   1486 }
   1487 
   1488 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
   1489   upb_inttable_iter_setdone(iter);
   1490 }
   1491 
   1492 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
   1493   upb_strtable_begin(iter, &m->ntoo);
   1494 }
   1495 
   1496 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); }
   1497 
   1498 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
   1499   return upb_strtable_done(iter);
   1500 }
   1501 
   1502 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
   1503   return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter));
   1504 }
   1505 
   1506 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
   1507   upb_strtable_iter_setdone(iter);
   1508 }
   1509 
   1510 /* upb_oneofdef ***************************************************************/
   1511 
   1512 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit,
   1513                        void *closure) {
   1514   const upb_oneofdef *o = (const upb_oneofdef*)r;
   1515   upb_oneof_iter i;
   1516   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
   1517     const upb_fielddef *f = upb_oneof_iter_field(&i);
   1518     visit(r, upb_fielddef_upcast2(f), closure);
   1519   }
   1520   if (o->parent) {
   1521     visit(r, upb_msgdef_upcast2(o->parent), closure);
   1522   }
   1523 }
   1524 
   1525 static void freeoneof(upb_refcounted *r) {
   1526   upb_oneofdef *o = (upb_oneofdef*)r;
   1527   upb_strtable_uninit(&o->ntof);
   1528   upb_inttable_uninit(&o->itof);
   1529   upb_def_uninit(upb_oneofdef_upcast_mutable(o));
   1530   free(o);
   1531 }
   1532 
   1533 upb_oneofdef *upb_oneofdef_new(const void *owner) {
   1534   static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof};
   1535   upb_oneofdef *o = malloc(sizeof(*o));
   1536   o->parent = NULL;
   1537   if (!o) return NULL;
   1538   if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl,
   1539                     owner))
   1540     goto err2;
   1541   if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2;
   1542   if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1;
   1543   return o;
   1544 
   1545 err1:
   1546   upb_inttable_uninit(&o->itof);
   1547 err2:
   1548   free(o);
   1549   return NULL;
   1550 }
   1551 
   1552 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) {
   1553   bool ok;
   1554   upb_oneof_iter i;
   1555   upb_oneofdef *newo = upb_oneofdef_new(owner);
   1556   if (!newo) return NULL;
   1557   ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo),
   1558                            upb_def_fullname(upb_oneofdef_upcast(o)), NULL);
   1559   UPB_ASSERT_VAR(ok, ok);
   1560   for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) {
   1561     upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f);
   1562     if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) {
   1563       upb_oneofdef_unref(newo, owner);
   1564       return NULL;
   1565     }
   1566   }
   1567   return newo;
   1568 }
   1569 
   1570 const char *upb_oneofdef_name(const upb_oneofdef *o) {
   1571   return upb_def_fullname(upb_oneofdef_upcast(o));
   1572 }
   1573 
   1574 bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname,
   1575                              upb_status *s) {
   1576   if (upb_oneofdef_containingtype(o)) {
   1577     upb_status_seterrmsg(s, "oneof already added to a message");
   1578     return false;
   1579   }
   1580   return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s);
   1581 }
   1582 
   1583 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
   1584   return o->parent;
   1585 }
   1586 
   1587 int upb_oneofdef_numfields(const upb_oneofdef *o) {
   1588   return upb_strtable_count(&o->ntof);
   1589 }
   1590 
   1591 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f,
   1592                            const void *ref_donor,
   1593                            upb_status *s) {
   1594   assert(!upb_oneofdef_isfrozen(o));
   1595   assert(!o->parent || !upb_msgdef_isfrozen(o->parent));
   1596 
   1597   /* This method is idempotent. Check if |f| is already part of this oneofdef
   1598    * and return immediately if so. */
   1599   if (upb_fielddef_containingoneof(f) == o) {
   1600     return true;
   1601   }
   1602 
   1603   /* The field must have an OPTIONAL label. */
   1604   if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
   1605     upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label");
   1606     return false;
   1607   }
   1608 
   1609   /* Check that no field with this name or number exists already in the oneof.
   1610    * Also check that the field is not already part of a oneof. */
   1611   if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) {
   1612     upb_status_seterrmsg(s, "field name or number were not set");
   1613     return false;
   1614   } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) ||
   1615              upb_oneofdef_ntofz(o, upb_fielddef_name(f))) {
   1616     upb_status_seterrmsg(s, "duplicate field name or number");
   1617     return false;
   1618   } else if (upb_fielddef_containingoneof(f) != NULL) {
   1619     upb_status_seterrmsg(s, "fielddef already belongs to a oneof");
   1620     return false;
   1621   }
   1622 
   1623   /* We allow adding a field to the oneof either if the field is not part of a
   1624    * msgdef, or if it is and we are also part of the same msgdef. */
   1625   if (o->parent == NULL) {
   1626     /* If we're not in a msgdef, the field cannot be either. Otherwise we would
   1627      * need to magically add this oneof to a msgdef to remain consistent, which
   1628      * is surprising behavior. */
   1629     if (upb_fielddef_containingtype(f) != NULL) {
   1630       upb_status_seterrmsg(s, "fielddef already belongs to a message, but "
   1631                               "oneof does not");
   1632       return false;
   1633     }
   1634   } else {
   1635     /* If we're in a msgdef, the user can add fields that either aren't in any
   1636      * msgdef (in which case they're added to our msgdef) or already a part of
   1637      * our msgdef. */
   1638     if (upb_fielddef_containingtype(f) != NULL &&
   1639         upb_fielddef_containingtype(f) != o->parent) {
   1640       upb_status_seterrmsg(s, "fielddef belongs to a different message "
   1641                               "than oneof");
   1642       return false;
   1643     }
   1644   }
   1645 
   1646   /* Commit phase. First add the field to our parent msgdef, if any, because
   1647    * that may fail; then add the field to our own tables. */
   1648 
   1649   if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) {
   1650     if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) {
   1651       return false;
   1652     }
   1653   }
   1654 
   1655   release_containingtype(f);
   1656   f->oneof = o;
   1657   upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f));
   1658   upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f));
   1659   upb_ref2(f, o);
   1660   upb_ref2(o, f);
   1661   if (ref_donor) upb_fielddef_unref(f, ref_donor);
   1662 
   1663   return true;
   1664 }
   1665 
   1666 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
   1667                                       const char *name, size_t length) {
   1668   upb_value val;
   1669   return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
   1670       upb_value_getptr(val) : NULL;
   1671 }
   1672 
   1673 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
   1674   upb_value val;
   1675   return upb_inttable_lookup32(&o->itof, num, &val) ?
   1676       upb_value_getptr(val) : NULL;
   1677 }
   1678 
   1679 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
   1680   upb_inttable_begin(iter, &o->itof);
   1681 }
   1682 
   1683 void upb_oneof_next(upb_oneof_iter *iter) {
   1684   upb_inttable_next(iter);
   1685 }
   1686 
   1687 bool upb_oneof_done(upb_oneof_iter *iter) {
   1688   return upb_inttable_done(iter);
   1689 }
   1690 
   1691 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
   1692   return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter));
   1693 }
   1694 
   1695 void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
   1696   upb_inttable_iter_setdone(iter);
   1697 }
   1698 
   1699 
   1700 #include <stdlib.h>
   1701 #include <stdio.h>
   1702 #include <string.h>
   1703 
   1704 typedef struct cleanup_ent {
   1705   upb_cleanup_func *cleanup;
   1706   void *ud;
   1707   struct cleanup_ent *next;
   1708 } cleanup_ent;
   1709 
   1710 static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size);
   1711 
   1712 /* Default allocator **********************************************************/
   1713 
   1714 /* Just use realloc, keeping all allocated blocks in a linked list to destroy at
   1715  * the end. */
   1716 
   1717 typedef struct mem_block {
   1718   /* List is doubly-linked, because in cases where realloc() moves an existing
   1719    * block, we need to be able to remove the old pointer from the list
   1720    * efficiently. */
   1721   struct mem_block *prev, *next;
   1722 #ifndef NDEBUG
   1723   size_t size;  /* Doesn't include mem_block structure. */
   1724 #endif
   1725 } mem_block;
   1726 
   1727 typedef struct {
   1728   mem_block *head;
   1729 } default_alloc_ud;
   1730 
   1731 static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) {
   1732   default_alloc_ud *ud = _ud;
   1733   mem_block *from, *block;
   1734   void *ret;
   1735   UPB_UNUSED(oldsize);
   1736 
   1737   from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL;
   1738 
   1739 #ifndef NDEBUG
   1740   if (from) {
   1741     assert(oldsize <= from->size);
   1742   }
   1743 #endif
   1744 
   1745   /* TODO(haberman): we probably need to provide even better alignment here,
   1746    * like 16-byte alignment of the returned data pointer. */
   1747   block = realloc(from, size + sizeof(mem_block));
   1748   if (!block) return NULL;
   1749   ret = (char*)block + sizeof(*block);
   1750 
   1751 #ifndef NDEBUG
   1752   block->size = size;
   1753 #endif
   1754 
   1755   if (from) {
   1756     if (block != from) {
   1757       /* The block was moved, so pointers in next and prev blocks must be
   1758        * updated to its new location. */
   1759       if (block->next) block->next->prev = block;
   1760       if (block->prev) block->prev->next = block;
   1761       if (ud->head == from) ud->head = block;
   1762     }
   1763   } else {
   1764     /* Insert at head of linked list. */
   1765     block->prev = NULL;
   1766     block->next = ud->head;
   1767     if (block->next) block->next->prev = block;
   1768     ud->head = block;
   1769   }
   1770 
   1771   return ret;
   1772 }
   1773 
   1774 static void default_alloc_cleanup(void *_ud) {
   1775   default_alloc_ud *ud = _ud;
   1776   mem_block *block = ud->head;
   1777 
   1778   while (block) {
   1779     void *to_free = block;
   1780     block = block->next;
   1781     free(to_free);
   1782   }
   1783 }
   1784 
   1785 
   1786 /* Standard error functions ***************************************************/
   1787 
   1788 static bool default_err(void *ud, const upb_status *status) {
   1789   UPB_UNUSED(ud);
   1790   UPB_UNUSED(status);
   1791   return false;
   1792 }
   1793 
   1794 static bool write_err_to(void *ud, const upb_status *status) {
   1795   upb_status *copy_to = ud;
   1796   upb_status_copy(copy_to, status);
   1797   return false;
   1798 }
   1799 
   1800 
   1801 /* upb_env ********************************************************************/
   1802 
   1803 void upb_env_init(upb_env *e) {
   1804   default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud;
   1805   e->ok_ = true;
   1806   e->bytes_allocated = 0;
   1807   e->cleanup_head = NULL;
   1808 
   1809   ud->head = NULL;
   1810 
   1811   /* Set default functions. */
   1812   upb_env_setallocfunc(e, default_alloc, ud);
   1813   upb_env_seterrorfunc(e, default_err, NULL);
   1814 }
   1815 
   1816 void upb_env_uninit(upb_env *e) {
   1817   cleanup_ent *ent = e->cleanup_head;
   1818 
   1819   while (ent) {
   1820     ent->cleanup(ent->ud);
   1821     ent = ent->next;
   1822   }
   1823 
   1824   /* Must do this after running cleanup functions, because this will delete
   1825      the memory we store our cleanup entries in! */
   1826   if (e->alloc == default_alloc) {
   1827     default_alloc_cleanup(e->alloc_ud);
   1828   }
   1829 }
   1830 
   1831 UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc,
   1832                                           void *ud) {
   1833   e->alloc = alloc;
   1834   e->alloc_ud = ud;
   1835 }
   1836 
   1837 UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func,
   1838                                           void *ud) {
   1839   e->err = func;
   1840   e->err_ud = ud;
   1841 }
   1842 
   1843 void upb_env_reporterrorsto(upb_env *e, upb_status *status) {
   1844   e->err = write_err_to;
   1845   e->err_ud = status;
   1846 }
   1847 
   1848 bool upb_env_ok(const upb_env *e) {
   1849   return e->ok_;
   1850 }
   1851 
   1852 bool upb_env_reporterror(upb_env *e, const upb_status *status) {
   1853   e->ok_ = false;
   1854   return e->err(e->err_ud, status);
   1855 }
   1856 
   1857 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) {
   1858   cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent));
   1859   if (!ent) return false;
   1860 
   1861   ent->cleanup = func;
   1862   ent->ud = ud;
   1863   ent->next = e->cleanup_head;
   1864   e->cleanup_head = ent;
   1865 
   1866   return true;
   1867 }
   1868 
   1869 void *upb_env_malloc(upb_env *e, size_t size) {
   1870   e->bytes_allocated += size;
   1871   if (e->alloc == seeded_alloc) {
   1872     /* This is equivalent to the next branch, but allows inlining for a
   1873      * measurable perf benefit. */
   1874     return seeded_alloc(e->alloc_ud, NULL, 0, size);
   1875   } else {
   1876     return e->alloc(e->alloc_ud, NULL, 0, size);
   1877   }
   1878 }
   1879 
   1880 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) {
   1881   char *ret;
   1882   assert(oldsize <= size);
   1883   ret = e->alloc(e->alloc_ud, ptr, oldsize, size);
   1884 
   1885 #ifndef NDEBUG
   1886   /* Overwrite non-preserved memory to ensure callers are passing the oldsize
   1887    * that they truly require. */
   1888   memset(ret + oldsize, 0xff, size - oldsize);
   1889 #endif
   1890 
   1891   return ret;
   1892 }
   1893 
   1894 size_t upb_env_bytesallocated(const upb_env *e) {
   1895   return e->bytes_allocated;
   1896 }
   1897 
   1898 
   1899 /* upb_seededalloc ************************************************************/
   1900 
   1901 /* Be conservative and choose 16 in case anyone is using SSE. */
   1902 static const size_t maxalign = 16;
   1903 
   1904 static size_t align_up(size_t size) {
   1905   return ((size + maxalign - 1) / maxalign) * maxalign;
   1906 }
   1907 
   1908 UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize,
   1909                                           size_t size) {
   1910   upb_seededalloc *a = ud;
   1911 
   1912   size = align_up(size);
   1913 
   1914   assert(a->mem_limit >= a->mem_ptr);
   1915 
   1916   if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) {
   1917     /* Fast path: we can satisfy from the initial allocation. */
   1918     void *ret = a->mem_ptr;
   1919     a->mem_ptr += size;
   1920     return ret;
   1921   } else {
   1922     char *chptr = ptr;
   1923     /* Slow path: fallback to other allocator. */
   1924     a->need_cleanup = true;
   1925     /* Is `ptr` part of the user-provided initial block? Don't pass it to the
   1926      * default allocator if so; otherwise, it may try to realloc() the block. */
   1927     if (chptr >= a->mem_base && chptr < a->mem_limit) {
   1928       void *ret;
   1929       assert(chptr + oldsize <= a->mem_limit);
   1930       ret = a->alloc(a->alloc_ud, NULL, 0, size);
   1931       if (ret) memcpy(ret, ptr, oldsize);
   1932       return ret;
   1933     } else {
   1934       return a->alloc(a->alloc_ud, ptr, oldsize, size);
   1935     }
   1936   }
   1937 }
   1938 
   1939 void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) {
   1940   default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud;
   1941   a->mem_base = mem;
   1942   a->mem_ptr = mem;
   1943   a->mem_limit = (char*)mem + len;
   1944   a->need_cleanup = false;
   1945   a->returned_allocfunc = false;
   1946 
   1947   ud->head = NULL;
   1948 
   1949   upb_seededalloc_setfallbackalloc(a, default_alloc, ud);
   1950 }
   1951 
   1952 void upb_seededalloc_uninit(upb_seededalloc *a) {
   1953   if (a->alloc == default_alloc && a->need_cleanup) {
   1954     default_alloc_cleanup(a->alloc_ud);
   1955   }
   1956 }
   1957 
   1958 UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a,
   1959                                                       upb_alloc_func *alloc,
   1960                                                       void *ud) {
   1961   assert(!a->returned_allocfunc);
   1962   a->alloc = alloc;
   1963   a->alloc_ud = ud;
   1964 }
   1965 
   1966 upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) {
   1967   a->returned_allocfunc = true;
   1968   return seeded_alloc;
   1969 }
   1970 /*
   1971 ** TODO(haberman): it's unclear whether a lot of the consistency checks should
   1972 ** assert() or return false.
   1973 */
   1974 
   1975 
   1976 #include <stdlib.h>
   1977 #include <string.h>
   1978 
   1979 
   1980 
   1981 /* Defined for the sole purpose of having a unique pointer value for
   1982  * UPB_NO_CLOSURE. */
   1983 char _upb_noclosure;
   1984 
   1985 static void freehandlers(upb_refcounted *r) {
   1986   upb_handlers *h = (upb_handlers*)r;
   1987 
   1988   upb_inttable_iter i;
   1989   upb_inttable_begin(&i, &h->cleanup_);
   1990   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   1991     void *val = (void*)upb_inttable_iter_key(&i);
   1992     upb_value func_val = upb_inttable_iter_value(&i);
   1993     upb_handlerfree *func = upb_value_getfptr(func_val);
   1994     func(val);
   1995   }
   1996 
   1997   upb_inttable_uninit(&h->cleanup_);
   1998   upb_msgdef_unref(h->msg, h);
   1999   free(h->sub);
   2000   free(h);
   2001 }
   2002 
   2003 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit,
   2004                           void *closure) {
   2005   const upb_handlers *h = (const upb_handlers*)r;
   2006   upb_msg_field_iter i;
   2007   for(upb_msg_field_begin(&i, h->msg);
   2008       !upb_msg_field_done(&i);
   2009       upb_msg_field_next(&i)) {
   2010     upb_fielddef *f = upb_msg_iter_field(&i);
   2011     const upb_handlers *sub;
   2012     if (!upb_fielddef_issubmsg(f)) continue;
   2013     sub = upb_handlers_getsubhandlers(h, f);
   2014     if (sub) visit(r, upb_handlers_upcast(sub), closure);
   2015   }
   2016 }
   2017 
   2018 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers};
   2019 
   2020 typedef struct {
   2021   upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
   2022   upb_handlers_callback *callback;
   2023   const void *closure;
   2024 } dfs_state;
   2025 
   2026 /* TODO(haberman): discard upb_handlers* objects that do not actually have any
   2027  * handlers set and cannot reach any upb_handlers* object that does.  This is
   2028  * slightly tricky to do correctly. */
   2029 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner,
   2030                                dfs_state *s) {
   2031   upb_msg_field_iter i;
   2032   upb_handlers *h = upb_handlers_new(m, owner);
   2033   if (!h) return NULL;
   2034   if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom;
   2035 
   2036   s->callback(s->closure, h);
   2037 
   2038   /* For each submessage field, get or create a handlers object and set it as
   2039    * the subhandlers. */
   2040   for(upb_msg_field_begin(&i, m);
   2041       !upb_msg_field_done(&i);
   2042       upb_msg_field_next(&i)) {
   2043     upb_fielddef *f = upb_msg_iter_field(&i);
   2044     const upb_msgdef *subdef;
   2045     upb_value subm_ent;
   2046 
   2047     if (!upb_fielddef_issubmsg(f)) continue;
   2048 
   2049     subdef = upb_downcast_msgdef(upb_fielddef_subdef(f));
   2050     if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) {
   2051       upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent));
   2052     } else {
   2053       upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s);
   2054       if (!sub_mh) goto oom;
   2055       upb_handlers_setsubhandlers(h, f, sub_mh);
   2056       upb_handlers_unref(sub_mh, &sub_mh);
   2057     }
   2058   }
   2059   return h;
   2060 
   2061 oom:
   2062   upb_handlers_unref(h, owner);
   2063   return NULL;
   2064 }
   2065 
   2066 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
   2067  * subhandlers for this submessage field. */
   2068 #define SUBH(h, selector) (h->sub[selector])
   2069 
   2070 /* The selector for a submessage field is the field index. */
   2071 #define SUBH_F(h, f) SUBH(h, f->index_)
   2072 
   2073 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
   2074                          upb_handlertype_t type) {
   2075   upb_selector_t sel;
   2076   assert(!upb_handlers_isfrozen(h));
   2077   if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) {
   2078     upb_status_seterrf(
   2079         &h->status_, "type mismatch: field %s does not belong to message %s",
   2080         upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h)));
   2081     return -1;
   2082   }
   2083   if (!upb_handlers_getselector(f, type, &sel)) {
   2084     upb_status_seterrf(
   2085         &h->status_,
   2086         "type mismatch: cannot register handler type %d for field %s",
   2087         type, upb_fielddef_name(f));
   2088     return -1;
   2089   }
   2090   return sel;
   2091 }
   2092 
   2093 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
   2094                              upb_handlertype_t type) {
   2095   int32_t sel = trygetsel(h, f, type);
   2096   assert(sel >= 0);
   2097   return sel;
   2098 }
   2099 
   2100 static const void **returntype(upb_handlers *h, const upb_fielddef *f,
   2101                                upb_handlertype_t type) {
   2102   return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_;
   2103 }
   2104 
   2105 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
   2106                   upb_handlertype_t type, upb_func *func,
   2107                   upb_handlerattr *attr) {
   2108   upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER;
   2109   const void *closure_type;
   2110   const void **context_closure_type;
   2111 
   2112   assert(!upb_handlers_isfrozen(h));
   2113 
   2114   if (sel < 0) {
   2115     upb_status_seterrmsg(&h->status_,
   2116                          "incorrect handler type for this field.");
   2117     return false;
   2118   }
   2119 
   2120   if (h->table[sel].func) {
   2121     upb_status_seterrmsg(&h->status_,
   2122                          "cannot change handler once it has been set.");
   2123     return false;
   2124   }
   2125 
   2126   if (attr) {
   2127     set_attr = *attr;
   2128   }
   2129 
   2130   /* Check that the given closure type matches the closure type that has been
   2131    * established for this context (if any). */
   2132   closure_type = upb_handlerattr_closuretype(&set_attr);
   2133 
   2134   if (type == UPB_HANDLER_STRING) {
   2135     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
   2136   } else if (f && upb_fielddef_isseq(f) &&
   2137              type != UPB_HANDLER_STARTSEQ &&
   2138              type != UPB_HANDLER_ENDSEQ) {
   2139     context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
   2140   } else {
   2141     context_closure_type = &h->top_closure_type;
   2142   }
   2143 
   2144   if (closure_type && *context_closure_type &&
   2145       closure_type != *context_closure_type) {
   2146     /* TODO(haberman): better message for debugging. */
   2147     if (f) {
   2148       upb_status_seterrf(&h->status_,
   2149                          "closure type does not match for field %s",
   2150                          upb_fielddef_name(f));
   2151     } else {
   2152       upb_status_seterrmsg(
   2153           &h->status_, "closure type does not match for message-level handler");
   2154     }
   2155     return false;
   2156   }
   2157 
   2158   if (closure_type)
   2159     *context_closure_type = closure_type;
   2160 
   2161   /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
   2162    * matches any pre-existing expectations about what type is expected. */
   2163   if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
   2164     const void *return_type = upb_handlerattr_returnclosuretype(&set_attr);
   2165     const void *table_return_type =
   2166         upb_handlerattr_returnclosuretype(&h->table[sel].attr);
   2167     if (return_type && table_return_type && return_type != table_return_type) {
   2168       upb_status_seterrmsg(&h->status_, "closure return type does not match");
   2169       return false;
   2170     }
   2171 
   2172     if (table_return_type && !return_type)
   2173       upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type);
   2174   }
   2175 
   2176   h->table[sel].func = (upb_func*)func;
   2177   h->table[sel].attr = set_attr;
   2178   return true;
   2179 }
   2180 
   2181 /* Returns the effective closure type for this handler (which will propagate
   2182  * from outer frames if this frame has no START* handler).  Not implemented for
   2183  * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
   2184  * the effective closure type is unspecified (either no handler was registered
   2185  * to specify it or the handler that was registered did not specify the closure
   2186  * type). */
   2187 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
   2188                                    upb_handlertype_t type) {
   2189   const void *ret;
   2190   upb_selector_t sel;
   2191 
   2192   assert(type != UPB_HANDLER_STRING);
   2193   ret = h->top_closure_type;
   2194 
   2195   if (upb_fielddef_isseq(f) &&
   2196       type != UPB_HANDLER_STARTSEQ &&
   2197       type != UPB_HANDLER_ENDSEQ &&
   2198       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
   2199     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
   2200   }
   2201 
   2202   if (type == UPB_HANDLER_STRING &&
   2203       h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
   2204     ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
   2205   }
   2206 
   2207   /* The effective type of the submessage; not used yet.
   2208    * if (type == SUBMESSAGE &&
   2209    *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
   2210    *   ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr);
   2211    * } */
   2212 
   2213   return ret;
   2214 }
   2215 
   2216 /* Checks whether the START* handler specified by f & type is missing even
   2217  * though it is required to convert the established type of an outer frame
   2218  * ("closure_type") into the established type of an inner frame (represented in
   2219  * the return closure type of this handler's attr. */
   2220 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
   2221                 upb_status *status) {
   2222   const void *closure_type;
   2223   const upb_handlerattr *attr;
   2224   const void *return_closure_type;
   2225 
   2226   upb_selector_t sel = handlers_getsel(h, f, type);
   2227   if (h->table[sel].func) return true;
   2228   closure_type = effective_closure_type(h, f, type);
   2229   attr = &h->table[sel].attr;
   2230   return_closure_type = upb_handlerattr_returnclosuretype(attr);
   2231   if (closure_type && return_closure_type &&
   2232       closure_type != return_closure_type) {
   2233     upb_status_seterrf(status,
   2234                        "expected start handler to return sub type for field %f",
   2235                        upb_fielddef_name(f));
   2236     return false;
   2237   }
   2238   return true;
   2239 }
   2240 
   2241 /* Public interface ***********************************************************/
   2242 
   2243 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) {
   2244   int extra;
   2245   upb_handlers *h;
   2246 
   2247   assert(upb_msgdef_isfrozen(md));
   2248 
   2249   extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1);
   2250   h = calloc(sizeof(*h) + extra, 1);
   2251   if (!h) return NULL;
   2252 
   2253   h->msg = md;
   2254   upb_msgdef_ref(h->msg, h);
   2255   upb_status_clear(&h->status_);
   2256   h->sub = calloc(md->submsg_field_count, sizeof(*h->sub));
   2257   if (!h->sub) goto oom;
   2258   if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner))
   2259     goto oom;
   2260   if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom;
   2261 
   2262   /* calloc() above initialized all handlers to NULL. */
   2263   return h;
   2264 
   2265 oom:
   2266   freehandlers(upb_handlers_upcast_mutable(h));
   2267   return NULL;
   2268 }
   2269 
   2270 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m,
   2271                                            const void *owner,
   2272                                            upb_handlers_callback *callback,
   2273                                            const void *closure) {
   2274   dfs_state state;
   2275   upb_handlers *ret;
   2276   bool ok;
   2277   upb_refcounted *r;
   2278 
   2279   state.callback = callback;
   2280   state.closure = closure;
   2281   if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL;
   2282 
   2283   ret = newformsg(m, owner, &state);
   2284 
   2285   upb_inttable_uninit(&state.tab);
   2286   if (!ret) return NULL;
   2287 
   2288   r = upb_handlers_upcast_mutable(ret);
   2289   ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH);
   2290   UPB_ASSERT_VAR(ok, ok);
   2291 
   2292   return ret;
   2293 }
   2294 
   2295 const upb_status *upb_handlers_status(upb_handlers *h) {
   2296   assert(!upb_handlers_isfrozen(h));
   2297   return &h->status_;
   2298 }
   2299 
   2300 void upb_handlers_clearerr(upb_handlers *h) {
   2301   assert(!upb_handlers_isfrozen(h));
   2302   upb_status_clear(&h->status_);
   2303 }
   2304 
   2305 #define SETTER(name, handlerctype, handlertype) \
   2306   bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \
   2307                                 handlerctype func, upb_handlerattr *attr) { \
   2308     int32_t sel = trygetsel(h, f, handlertype); \
   2309     return doset(h, sel, f, handlertype, (upb_func*)func, attr); \
   2310   }
   2311 
   2312 SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
   2313 SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
   2314 SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
   2315 SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
   2316 SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
   2317 SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
   2318 SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
   2319 SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
   2320 SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
   2321 SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
   2322 SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
   2323 SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
   2324 SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
   2325 SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
   2326 
   2327 #undef SETTER
   2328 
   2329 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
   2330                               upb_handlerattr *attr) {
   2331   return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
   2332                (upb_func *)func, attr);
   2333 }
   2334 
   2335 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
   2336                             upb_handlerattr *attr) {
   2337   assert(!upb_handlers_isfrozen(h));
   2338   return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
   2339                (upb_func *)func, attr);
   2340 }
   2341 
   2342 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
   2343                                  const upb_handlers *sub) {
   2344   assert(sub);
   2345   assert(!upb_handlers_isfrozen(h));
   2346   assert(upb_fielddef_issubmsg(f));
   2347   if (SUBH_F(h, f)) return false;  /* Can't reset. */
   2348   if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) {
   2349     return false;
   2350   }
   2351   SUBH_F(h, f) = sub;
   2352   upb_ref2(sub, h);
   2353   return true;
   2354 }
   2355 
   2356 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
   2357                                                 const upb_fielddef *f) {
   2358   assert(upb_fielddef_issubmsg(f));
   2359   return SUBH_F(h, f);
   2360 }
   2361 
   2362 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
   2363                           upb_handlerattr *attr) {
   2364   if (!upb_handlers_gethandler(h, sel))
   2365     return false;
   2366   *attr = h->table[sel].attr;
   2367   return true;
   2368 }
   2369 
   2370 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
   2371                                                     upb_selector_t sel) {
   2372   /* STARTSUBMSG selector in sel is the field's selector base. */
   2373   return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
   2374 }
   2375 
   2376 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
   2377 
   2378 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
   2379   bool ok;
   2380   if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) {
   2381     return false;
   2382   }
   2383   ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func));
   2384   UPB_ASSERT_VAR(ok, ok);
   2385   return true;
   2386 }
   2387 
   2388 
   2389 /* "Static" methods ***********************************************************/
   2390 
   2391 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) {
   2392   /* TODO: verify we have a transitive closure. */
   2393   int i;
   2394   for (i = 0; i < n; i++) {
   2395     upb_msg_field_iter j;
   2396     upb_handlers *h = handlers[i];
   2397 
   2398     if (!upb_ok(&h->status_)) {
   2399       upb_status_seterrf(s, "handlers for message %s had error status: %s",
   2400                          upb_msgdef_fullname(upb_handlers_msgdef(h)),
   2401                          upb_status_errmsg(&h->status_));
   2402       return false;
   2403     }
   2404 
   2405     /* Check that there are no closure mismatches due to missing Start* handlers
   2406      * or subhandlers with different type-level types. */
   2407     for(upb_msg_field_begin(&j, h->msg);
   2408         !upb_msg_field_done(&j);
   2409         upb_msg_field_next(&j)) {
   2410 
   2411       const upb_fielddef *f = upb_msg_iter_field(&j);
   2412       if (upb_fielddef_isseq(f)) {
   2413         if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s))
   2414           return false;
   2415       }
   2416 
   2417       if (upb_fielddef_isstring(f)) {
   2418         if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s))
   2419           return false;
   2420       }
   2421 
   2422       if (upb_fielddef_issubmsg(f)) {
   2423         bool hashandler = false;
   2424         if (upb_handlers_gethandler(
   2425                 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) ||
   2426             upb_handlers_gethandler(
   2427                 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) {
   2428           hashandler = true;
   2429         }
   2430 
   2431         if (upb_fielddef_isseq(f) &&
   2432             (upb_handlers_gethandler(
   2433                  h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) ||
   2434              upb_handlers_gethandler(
   2435                  h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) {
   2436           hashandler = true;
   2437         }
   2438 
   2439         if (hashandler && !upb_handlers_getsubhandlers(h, f)) {
   2440           /* For now we add an empty subhandlers in this case.  It makes the
   2441            * decoder code generator simpler, because it only has to handle two
   2442            * cases (submessage has handlers or not) as opposed to three
   2443            * (submessage has handlers in enclosing message but no subhandlers).
   2444            *
   2445            * This makes parsing less efficient in the case that we want to
   2446            * notice a submessage but skip its contents (like if we're testing
   2447            * for submessage presence or counting the number of repeated
   2448            * submessages).  In this case we will end up parsing the submessage
   2449            * field by field and throwing away the results for each, instead of
   2450            * skipping the whole delimited thing at once.  If this is an issue we
   2451            * can revisit it, but do remember that this only arises when you have
   2452            * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the
   2453            * submessage but no subhandlers.  The uses cases for this are
   2454            * limited. */
   2455           upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub);
   2456           upb_handlers_setsubhandlers(h, f, sub);
   2457           upb_handlers_unref(sub, &sub);
   2458         }
   2459 
   2460         /* TODO(haberman): check type of submessage.
   2461          * This is slightly tricky; also consider whether we should check that
   2462          * they match at setsubhandlers time. */
   2463       }
   2464     }
   2465   }
   2466 
   2467   if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s,
   2468                              UPB_MAX_HANDLER_DEPTH)) {
   2469     return false;
   2470   }
   2471 
   2472   return true;
   2473 }
   2474 
   2475 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
   2476   switch (upb_fielddef_type(f)) {
   2477     case UPB_TYPE_INT32:
   2478     case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
   2479     case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
   2480     case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
   2481     case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
   2482     case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
   2483     case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
   2484     case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
   2485     default: assert(false); return -1;  /* Invalid input. */
   2486   }
   2487 }
   2488 
   2489 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
   2490                               upb_selector_t *s) {
   2491   switch (type) {
   2492     case UPB_HANDLER_INT32:
   2493     case UPB_HANDLER_INT64:
   2494     case UPB_HANDLER_UINT32:
   2495     case UPB_HANDLER_UINT64:
   2496     case UPB_HANDLER_FLOAT:
   2497     case UPB_HANDLER_DOUBLE:
   2498     case UPB_HANDLER_BOOL:
   2499       if (!upb_fielddef_isprimitive(f) ||
   2500           upb_handlers_getprimitivehandlertype(f) != type)
   2501         return false;
   2502       *s = f->selector_base;
   2503       break;
   2504     case UPB_HANDLER_STRING:
   2505       if (upb_fielddef_isstring(f)) {
   2506         *s = f->selector_base;
   2507       } else if (upb_fielddef_lazy(f)) {
   2508         *s = f->selector_base + 3;
   2509       } else {
   2510         return false;
   2511       }
   2512       break;
   2513     case UPB_HANDLER_STARTSTR:
   2514       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
   2515         *s = f->selector_base + 1;
   2516       } else {
   2517         return false;
   2518       }
   2519       break;
   2520     case UPB_HANDLER_ENDSTR:
   2521       if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
   2522         *s = f->selector_base + 2;
   2523       } else {
   2524         return false;
   2525       }
   2526       break;
   2527     case UPB_HANDLER_STARTSEQ:
   2528       if (!upb_fielddef_isseq(f)) return false;
   2529       *s = f->selector_base - 2;
   2530       break;
   2531     case UPB_HANDLER_ENDSEQ:
   2532       if (!upb_fielddef_isseq(f)) return false;
   2533       *s = f->selector_base - 1;
   2534       break;
   2535     case UPB_HANDLER_STARTSUBMSG:
   2536       if (!upb_fielddef_issubmsg(f)) return false;
   2537       /* Selectors for STARTSUBMSG are at the beginning of the table so that the
   2538        * selector can also be used as an index into the "sub" array of
   2539        * subhandlers.  The indexes for the two into these two tables are the
   2540        * same, except that in the handler table the static selectors come first. */
   2541       *s = f->index_ + UPB_STATIC_SELECTOR_COUNT;
   2542       break;
   2543     case UPB_HANDLER_ENDSUBMSG:
   2544       if (!upb_fielddef_issubmsg(f)) return false;
   2545       *s = f->selector_base;
   2546       break;
   2547   }
   2548   assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count);
   2549   return true;
   2550 }
   2551 
   2552 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
   2553   return upb_fielddef_isseq(f) ? 2 : 0;
   2554 }
   2555 
   2556 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
   2557   uint32_t ret = 1;
   2558   if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
   2559   if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
   2560   if (upb_fielddef_issubmsg(f)) {
   2561     /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
   2562     ret += 0;
   2563     if (upb_fielddef_lazy(f)) {
   2564       /* STARTSTR/ENDSTR/STRING (for lazy) */
   2565       ret += 3;
   2566     }
   2567   }
   2568   return ret;
   2569 }
   2570 
   2571 
   2572 /* upb_handlerattr ************************************************************/
   2573 
   2574 void upb_handlerattr_init(upb_handlerattr *attr) {
   2575   upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER;
   2576   memcpy(attr, &from, sizeof(*attr));
   2577 }
   2578 
   2579 void upb_handlerattr_uninit(upb_handlerattr *attr) {
   2580   UPB_UNUSED(attr);
   2581 }
   2582 
   2583 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) {
   2584   attr->handler_data_ = hd;
   2585   return true;
   2586 }
   2587 
   2588 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) {
   2589   attr->closure_type_ = type;
   2590   return true;
   2591 }
   2592 
   2593 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) {
   2594   return attr->closure_type_;
   2595 }
   2596 
   2597 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr,
   2598                                           const void *type) {
   2599   attr->return_closure_type_ = type;
   2600   return true;
   2601 }
   2602 
   2603 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) {
   2604   return attr->return_closure_type_;
   2605 }
   2606 
   2607 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) {
   2608   attr->alwaysok_ = alwaysok;
   2609   return true;
   2610 }
   2611 
   2612 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) {
   2613   return attr->alwaysok_;
   2614 }
   2615 
   2616 /* upb_bufhandle **************************************************************/
   2617 
   2618 size_t upb_bufhandle_objofs(const upb_bufhandle *h) {
   2619   return h->objofs_;
   2620 }
   2621 
   2622 /* upb_byteshandler ***********************************************************/
   2623 
   2624 void upb_byteshandler_init(upb_byteshandler* h) {
   2625   memset(h, 0, sizeof(*h));
   2626 }
   2627 
   2628 /* For when we support handlerfree callbacks. */
   2629 void upb_byteshandler_uninit(upb_byteshandler* h) {
   2630   UPB_UNUSED(h);
   2631 }
   2632 
   2633 bool upb_byteshandler_setstartstr(upb_byteshandler *h,
   2634                                   upb_startstr_handlerfunc *func, void *d) {
   2635   h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
   2636   h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d;
   2637   return true;
   2638 }
   2639 
   2640 bool upb_byteshandler_setstring(upb_byteshandler *h,
   2641                                 upb_string_handlerfunc *func, void *d) {
   2642   h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
   2643   h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d;
   2644   return true;
   2645 }
   2646 
   2647 bool upb_byteshandler_setendstr(upb_byteshandler *h,
   2648                                 upb_endfield_handlerfunc *func, void *d) {
   2649   h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
   2650   h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d;
   2651   return true;
   2652 }
   2653 /*
   2654 ** upb::RefCounted Implementation
   2655 **
   2656 ** Our key invariants are:
   2657 ** 1. reference cycles never span groups
   2658 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to)
   2659 **
   2660 ** The previous two are how we avoid leaking cycles.  Other important
   2661 ** invariants are:
   2662 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from)
   2663 **    this implies group(from) == group(to).  (In practice, what we implement
   2664 **    is even stronger; "from" and "to" will share a group if there has *ever*
   2665 **    been a ref2(to, from), but all that is necessary for correctness is the
   2666 **    weaker one).
   2667 ** 4. mutable and immutable objects are never in the same group.
   2668 */
   2669 
   2670 
   2671 #include <setjmp.h>
   2672 #include <stdlib.h>
   2673 
   2674 static void freeobj(upb_refcounted *o);
   2675 
   2676 const char untracked_val;
   2677 const void *UPB_UNTRACKED_REF = &untracked_val;
   2678 
   2679 /* arch-specific atomic primitives  *******************************************/
   2680 
   2681 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/
   2682 
   2683 static void atomic_inc(uint32_t *a) { (*a)++; }
   2684 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; }
   2685 
   2686 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/
   2687 
   2688 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); }
   2689 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; }
   2690 
   2691 #elif defined(WIN32) /*-------------------------------------------------------*/
   2692 
   2693 #include <Windows.h>
   2694 
   2695 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); }
   2696 static bool atomic_dec(upb_atomic_t *a) {
   2697   return InterlockedDecrement(&a->val) == 0;
   2698 }
   2699 
   2700 #else
   2701 #error Atomic primitives not defined for your platform/CPU.  \
   2702        Implement them or compile with UPB_THREAD_UNSAFE.
   2703 #endif
   2704 
   2705 /* All static objects point to this refcount.
   2706  * It is special-cased in ref/unref below.  */
   2707 uint32_t static_refcount = -1;
   2708 
   2709 /* We can avoid atomic ops for statically-declared objects.
   2710  * This is a minor optimization but nice since we can avoid degrading under
   2711  * contention in this case. */
   2712 
   2713 static void refgroup(uint32_t *group) {
   2714   if (group != &static_refcount)
   2715     atomic_inc(group);
   2716 }
   2717 
   2718 static bool unrefgroup(uint32_t *group) {
   2719   if (group == &static_refcount) {
   2720     return false;
   2721   } else {
   2722     return atomic_dec(group);
   2723   }
   2724 }
   2725 
   2726 
   2727 /* Reference tracking (debug only) ********************************************/
   2728 
   2729 #ifdef UPB_DEBUG_REFS
   2730 
   2731 #ifdef UPB_THREAD_UNSAFE
   2732 
   2733 static void upb_lock() {}
   2734 static void upb_unlock() {}
   2735 
   2736 #else
   2737 
   2738 /* User must define functions that lock/unlock a global mutex and link this
   2739  * file against them. */
   2740 void upb_lock();
   2741 void upb_unlock();
   2742 
   2743 #endif
   2744 
   2745 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some
   2746  * code-paths that can normally never fail, like upb_refcounted_ref().  Since
   2747  * we have no way to propagage out-of-memory errors back to the user, and since
   2748  * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */
   2749 #define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); }
   2750 
   2751 typedef struct {
   2752   int count;  /* How many refs there are (duplicates only allowed for ref2). */
   2753   bool is_ref2;
   2754 } trackedref;
   2755 
   2756 static trackedref *trackedref_new(bool is_ref2) {
   2757   trackedref *ret = malloc(sizeof(*ret));
   2758   CHECK_OOM(ret);
   2759   ret->count = 1;
   2760   ret->is_ref2 = is_ref2;
   2761   return ret;
   2762 }
   2763 
   2764 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
   2765   upb_value v;
   2766 
   2767   assert(owner);
   2768   if (owner == UPB_UNTRACKED_REF) return;
   2769 
   2770   upb_lock();
   2771   if (upb_inttable_lookupptr(r->refs, owner, &v)) {
   2772     trackedref *ref = upb_value_getptr(v);
   2773     /* Since we allow multiple ref2's for the same to/from pair without
   2774      * allocating separate memory for each one, we lose the fine-grained
   2775      * tracking behavior we get with regular refs.  Since ref2s only happen
   2776      * inside upb, we'll accept this limitation until/unless there is a really
   2777      * difficult upb-internal bug that can't be figured out without it. */
   2778     assert(ref2);
   2779     assert(ref->is_ref2);
   2780     ref->count++;
   2781   } else {
   2782     trackedref *ref = trackedref_new(ref2);
   2783     bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref));
   2784     CHECK_OOM(ok);
   2785     if (ref2) {
   2786       /* We know this cast is safe when it is a ref2, because it's coming from
   2787        * another refcounted object. */
   2788       const upb_refcounted *from = owner;
   2789       assert(!upb_inttable_lookupptr(from->ref2s, r, NULL));
   2790       ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL));
   2791       CHECK_OOM(ok);
   2792     }
   2793   }
   2794   upb_unlock();
   2795 }
   2796 
   2797 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
   2798   upb_value v;
   2799   bool found;
   2800   trackedref *ref;
   2801 
   2802   assert(owner);
   2803   if (owner == UPB_UNTRACKED_REF) return;
   2804 
   2805   upb_lock();
   2806   found = upb_inttable_lookupptr(r->refs, owner, &v);
   2807   /* This assert will fail if an owner attempts to release a ref it didn't have. */
   2808   UPB_ASSERT_VAR(found, found);
   2809   ref = upb_value_getptr(v);
   2810   assert(ref->is_ref2 == ref2);
   2811   if (--ref->count == 0) {
   2812     free(ref);
   2813     upb_inttable_removeptr(r->refs, owner, NULL);
   2814     if (ref2) {
   2815       /* We know this cast is safe when it is a ref2, because it's coming from
   2816        * another refcounted object. */
   2817       const upb_refcounted *from = owner;
   2818       bool removed = upb_inttable_removeptr(from->ref2s, r, NULL);
   2819       assert(removed);
   2820     }
   2821   }
   2822   upb_unlock();
   2823 }
   2824 
   2825 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
   2826   upb_value v;
   2827   bool found;
   2828   trackedref *ref;
   2829 
   2830   upb_lock();
   2831   found = upb_inttable_lookupptr(r->refs, owner, &v);
   2832   UPB_ASSERT_VAR(found, found);
   2833   ref = upb_value_getptr(v);
   2834   assert(ref->is_ref2 == ref2);
   2835   upb_unlock();
   2836 }
   2837 
   2838 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that
   2839  * originate from the given owner. */
   2840 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) {
   2841   upb_inttable_iter i;
   2842 
   2843   upb_lock();
   2844   upb_inttable_begin(&i, owner->ref2s);
   2845   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   2846     upb_value v;
   2847     upb_value count;
   2848     trackedref *ref;
   2849     bool ok;
   2850     bool found;
   2851 
   2852     upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i);
   2853 
   2854     /* To get the count we need to look in the target's table. */
   2855     found = upb_inttable_lookupptr(to->refs, owner, &v);
   2856     assert(found);
   2857     ref = upb_value_getptr(v);
   2858     count = upb_value_int32(ref->count);
   2859 
   2860     ok = upb_inttable_insertptr(tab, to, count);
   2861     CHECK_OOM(ok);
   2862   }
   2863   upb_unlock();
   2864 }
   2865 
   2866 typedef struct {
   2867   upb_inttable ref2;
   2868   const upb_refcounted *obj;
   2869 } check_state;
   2870 
   2871 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj,
   2872                         void *closure) {
   2873   check_state *s = closure;
   2874   upb_inttable *ref2 = &s->ref2;
   2875   upb_value v;
   2876   bool removed;
   2877   int32_t newcount;
   2878 
   2879   assert(obj == s->obj);
   2880   assert(subobj);
   2881   removed = upb_inttable_removeptr(ref2, subobj, &v);
   2882   /* The following assertion will fail if the visit() function visits a subobj
   2883    * that it did not have a ref2 on, or visits the same subobj too many times. */
   2884   assert(removed);
   2885   newcount = upb_value_getint32(v) - 1;
   2886   if (newcount > 0) {
   2887     upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount));
   2888   }
   2889 }
   2890 
   2891 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
   2892                   void *closure) {
   2893   bool ok;
   2894 
   2895   /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know
   2896    * exactly the set of nodes that visit() should visit.  So we verify visit()'s
   2897    * correctness here. */
   2898   check_state state;
   2899   state.obj = r;
   2900   ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32);
   2901   CHECK_OOM(ok);
   2902   getref2s(r, &state.ref2);
   2903 
   2904   /* This should visit any children in the ref2 table. */
   2905   if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state);
   2906 
   2907   /* This assertion will fail if the visit() function missed any children. */
   2908   assert(upb_inttable_count(&state.ref2) == 0);
   2909   upb_inttable_uninit(&state.ref2);
   2910   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
   2911 }
   2912 
   2913 static bool trackinit(upb_refcounted *r) {
   2914   r->refs = malloc(sizeof(*r->refs));
   2915   r->ref2s = malloc(sizeof(*r->ref2s));
   2916   if (!r->refs || !r->ref2s) goto err1;
   2917 
   2918   if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1;
   2919   if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2;
   2920   return true;
   2921 
   2922 err2:
   2923   upb_inttable_uninit(r->refs);
   2924 err1:
   2925   free(r->refs);
   2926   free(r->ref2s);
   2927   return false;
   2928 }
   2929 
   2930 static void trackfree(const upb_refcounted *r) {
   2931   upb_inttable_uninit(r->refs);
   2932   upb_inttable_uninit(r->ref2s);
   2933   free(r->refs);
   2934   free(r->ref2s);
   2935 }
   2936 
   2937 #else
   2938 
   2939 static void track(const upb_refcounted *r, const void *owner, bool ref2) {
   2940   UPB_UNUSED(r);
   2941   UPB_UNUSED(owner);
   2942   UPB_UNUSED(ref2);
   2943 }
   2944 
   2945 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) {
   2946   UPB_UNUSED(r);
   2947   UPB_UNUSED(owner);
   2948   UPB_UNUSED(ref2);
   2949 }
   2950 
   2951 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) {
   2952   UPB_UNUSED(r);
   2953   UPB_UNUSED(owner);
   2954   UPB_UNUSED(ref2);
   2955 }
   2956 
   2957 static bool trackinit(upb_refcounted *r) {
   2958   UPB_UNUSED(r);
   2959   return true;
   2960 }
   2961 
   2962 static void trackfree(const upb_refcounted *r) {
   2963   UPB_UNUSED(r);
   2964 }
   2965 
   2966 static void visit(const upb_refcounted *r, upb_refcounted_visit *v,
   2967                   void *closure) {
   2968   if (r->vtbl->visit) r->vtbl->visit(r, v, closure);
   2969 }
   2970 
   2971 #endif  /* UPB_DEBUG_REFS */
   2972 
   2973 
   2974 /* freeze() *******************************************************************/
   2975 
   2976 /* The freeze() operation is by far the most complicated part of this scheme.
   2977  * We compute strongly-connected components and then mutate the graph such that
   2978  * we preserve the invariants documented at the top of this file.  And we must
   2979  * handle out-of-memory errors gracefully (without leaving the graph
   2980  * inconsistent), which adds to the fun. */
   2981 
   2982 /* The state used by the freeze operation (shared across many functions). */
   2983 typedef struct {
   2984   int depth;
   2985   int maxdepth;
   2986   uint64_t index;
   2987   /* Maps upb_refcounted* -> attributes (color, etc).  attr layout varies by
   2988    * color. */
   2989   upb_inttable objattr;
   2990   upb_inttable stack;   /* stack of upb_refcounted* for Tarjan's algorithm. */
   2991   upb_inttable groups;  /* array of uint32_t*, malloc'd refcounts for new groups */
   2992   upb_status *status;
   2993   jmp_buf err;
   2994 } tarjan;
   2995 
   2996 static void release_ref2(const upb_refcounted *obj,
   2997                          const upb_refcounted *subobj,
   2998                          void *closure);
   2999 
   3000 /* Node attributes -----------------------------------------------------------*/
   3001 
   3002 /* After our analysis phase all nodes will be either GRAY or WHITE. */
   3003 
   3004 typedef enum {
   3005   BLACK = 0,  /* Object has not been seen. */
   3006   GRAY,   /* Object has been found via a refgroup but may not be reachable. */
   3007   GREEN,  /* Object is reachable and is currently on the Tarjan stack. */
   3008   WHITE   /* Object is reachable and has been assigned a group (SCC). */
   3009 } color_t;
   3010 
   3011 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); }
   3012 UPB_NORETURN static void oom(tarjan *t) {
   3013   upb_status_seterrmsg(t->status, "out of memory");
   3014   err(t);
   3015 }
   3016 
   3017 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) {
   3018   upb_value v;
   3019   return upb_inttable_lookupptr(&t->objattr, r, &v) ?
   3020       upb_value_getuint64(v) : 0;
   3021 }
   3022 
   3023 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) {
   3024   upb_value v;
   3025   bool found = upb_inttable_lookupptr(&t->objattr, r, &v);
   3026   UPB_ASSERT_VAR(found, found);
   3027   return upb_value_getuint64(v);
   3028 }
   3029 
   3030 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) {
   3031   upb_inttable_removeptr(&t->objattr, r, NULL);
   3032   upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr));
   3033 }
   3034 
   3035 static color_t color(tarjan *t, const upb_refcounted *r) {
   3036   return trygetattr(t, r) & 0x3;  /* Color is always stored in the low 2 bits. */
   3037 }
   3038 
   3039 static void set_gray(tarjan *t, const upb_refcounted *r) {
   3040   assert(color(t, r) == BLACK);
   3041   setattr(t, r, GRAY);
   3042 }
   3043 
   3044 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */
   3045 static void push(tarjan *t, const upb_refcounted *r) {
   3046   assert(color(t, r) == BLACK || color(t, r) == GRAY);
   3047   /* This defines the attr layout for the GREEN state.  "index" and "lowlink"
   3048    * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */
   3049   setattr(t, r, GREEN | (t->index << 2) | (t->index << 33));
   3050   if (++t->index == 0x80000000) {
   3051     upb_status_seterrmsg(t->status, "too many objects to freeze");
   3052     err(t);
   3053   }
   3054   upb_inttable_push(&t->stack, upb_value_ptr((void*)r));
   3055 }
   3056 
   3057 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its
   3058  * SCC group. */
   3059 static upb_refcounted *pop(tarjan *t) {
   3060   upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack));
   3061   assert(color(t, r) == GREEN);
   3062   /* This defines the attr layout for nodes in the WHITE state.
   3063    * Top of group stack is [group, NULL]; we point at group. */
   3064   setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8);
   3065   return r;
   3066 }
   3067 
   3068 static void tarjan_newgroup(tarjan *t) {
   3069   uint32_t *group = malloc(sizeof(*group));
   3070   if (!group) oom(t);
   3071   /* Push group and empty group leader (we'll fill in leader later). */
   3072   if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) ||
   3073       !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) {
   3074     free(group);
   3075     oom(t);
   3076   }
   3077   *group = 0;
   3078 }
   3079 
   3080 static uint32_t idx(tarjan *t, const upb_refcounted *r) {
   3081   assert(color(t, r) == GREEN);
   3082   return (getattr(t, r) >> 2) & 0x7FFFFFFF;
   3083 }
   3084 
   3085 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) {
   3086   if (color(t, r) == GREEN) {
   3087     return getattr(t, r) >> 33;
   3088   } else {
   3089     return UINT32_MAX;
   3090   }
   3091 }
   3092 
   3093 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) {
   3094   assert(color(t, r) == GREEN);
   3095   setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF));
   3096 }
   3097 
   3098 static uint32_t *group(tarjan *t, upb_refcounted *r) {
   3099   uint64_t groupnum;
   3100   upb_value v;
   3101   bool found;
   3102 
   3103   assert(color(t, r) == WHITE);
   3104   groupnum = getattr(t, r) >> 8;
   3105   found = upb_inttable_lookup(&t->groups, groupnum, &v);
   3106   UPB_ASSERT_VAR(found, found);
   3107   return upb_value_getptr(v);
   3108 }
   3109 
   3110 /* If the group leader for this object's group has not previously been set,
   3111  * the given object is assigned to be its leader. */
   3112 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) {
   3113   uint64_t leader_slot;
   3114   upb_value v;
   3115   bool found;
   3116 
   3117   assert(color(t, r) == WHITE);
   3118   leader_slot = (getattr(t, r) >> 8) + 1;
   3119   found = upb_inttable_lookup(&t->groups, leader_slot, &v);
   3120   UPB_ASSERT_VAR(found, found);
   3121   if (upb_value_getptr(v)) {
   3122     return upb_value_getptr(v);
   3123   } else {
   3124     upb_inttable_remove(&t->groups, leader_slot, NULL);
   3125     upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r));
   3126     return r;
   3127   }
   3128 }
   3129 
   3130 
   3131 /* Tarjan's algorithm --------------------------------------------------------*/
   3132 
   3133 /* See:
   3134  *   http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */
   3135 static void do_tarjan(const upb_refcounted *obj, tarjan *t);
   3136 
   3137 static void tarjan_visit(const upb_refcounted *obj,
   3138                          const upb_refcounted *subobj,
   3139                          void *closure) {
   3140   tarjan *t = closure;
   3141   if (++t->depth > t->maxdepth) {
   3142     upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth);
   3143     err(t);
   3144   } else if (subobj->is_frozen || color(t, subobj) == WHITE) {
   3145     /* Do nothing: we don't want to visit or color already-frozen nodes,
   3146      * and WHITE nodes have already been assigned a SCC. */
   3147   } else if (color(t, subobj) < GREEN) {
   3148     /* Subdef has not yet been visited; recurse on it. */
   3149     do_tarjan(subobj, t);
   3150     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj)));
   3151   } else if (color(t, subobj) == GREEN) {
   3152     /* Subdef is in the stack and hence in the current SCC. */
   3153     set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj)));
   3154   }
   3155   --t->depth;
   3156 }
   3157 
   3158 static void do_tarjan(const upb_refcounted *obj, tarjan *t) {
   3159   if (color(t, obj) == BLACK) {
   3160     /* We haven't seen this object's group; mark the whole group GRAY. */
   3161     const upb_refcounted *o = obj;
   3162     do { set_gray(t, o); } while ((o = o->next) != obj);
   3163   }
   3164 
   3165   push(t, obj);
   3166   visit(obj, tarjan_visit, t);
   3167   if (lowlink(t, obj) == idx(t, obj)) {
   3168     tarjan_newgroup(t);
   3169     while (pop(t) != obj)
   3170       ;
   3171   }
   3172 }
   3173 
   3174 
   3175 /* freeze() ------------------------------------------------------------------*/
   3176 
   3177 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj,
   3178                      void *_t) {
   3179   tarjan *t = _t;
   3180   assert(color(t, r) > BLACK);
   3181   if (color(t, subobj) > BLACK && r->group != subobj->group) {
   3182     /* Previously this ref was not reflected in subobj->group because they
   3183      * were in the same group; now that they are split a ref must be taken. */
   3184     refgroup(subobj->group);
   3185   }
   3186 }
   3187 
   3188 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s,
   3189                    int maxdepth) {
   3190   volatile bool ret = false;
   3191   int i;
   3192   upb_inttable_iter iter;
   3193 
   3194   /* We run in two passes so that we can allocate all memory before performing
   3195    * any mutation of the input -- this allows us to leave the input unchanged
   3196    * in the case of memory allocation failure. */
   3197   tarjan t;
   3198   t.index = 0;
   3199   t.depth = 0;
   3200   t.maxdepth = maxdepth;
   3201   t.status = s;
   3202   if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1;
   3203   if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2;
   3204   if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3;
   3205   if (setjmp(t.err) != 0) goto err4;
   3206 
   3207 
   3208   for (i = 0; i < n; i++) {
   3209     if (color(&t, roots[i]) < GREEN) {
   3210       do_tarjan(roots[i], &t);
   3211     }
   3212   }
   3213 
   3214   /* If we've made it this far, no further errors are possible so it's safe to
   3215    * mutate the objects without risk of leaving them in an inconsistent state. */
   3216   ret = true;
   3217 
   3218   /* The transformation that follows requires care.  The preconditions are:
   3219    * - all objects in attr map are WHITE or GRAY, and are in mutable groups
   3220    *   (groups of all mutable objs)
   3221    * - no ref2(to, from) refs have incremented count(to) if both "to" and
   3222    *   "from" are in our attr map (this follows from invariants (2) and (3)) */
   3223 
   3224   /* Pass 1: we remove WHITE objects from their mutable groups, and add them to
   3225    * new groups  according to the SCC's we computed.  These new groups will
   3226    * consist of only frozen objects.  None will be immediately collectible,
   3227    * because WHITE objects are by definition reachable from one of "roots",
   3228    * which the caller must own refs on. */
   3229   upb_inttable_begin(&iter, &t.objattr);
   3230   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
   3231     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
   3232     /* Since removal from a singly-linked list requires access to the object's
   3233      * predecessor, we consider obj->next instead of obj for moving.  With the
   3234      * while() loop we guarantee that we will visit every node's predecessor.
   3235      * Proof:
   3236      *  1. every node's predecessor is in our attr map.
   3237      *  2. though the loop body may change a node's predecessor, it will only
   3238      *     change it to be the node we are currently operating on, so with a
   3239      *     while() loop we guarantee ourselves the chance to remove each node. */
   3240     while (color(&t, obj->next) == WHITE &&
   3241            group(&t, obj->next) != obj->next->group) {
   3242       upb_refcounted *leader;
   3243 
   3244       /* Remove from old group. */
   3245       upb_refcounted *move = obj->next;
   3246       if (obj == move) {
   3247         /* Removing the last object from a group. */
   3248         assert(*obj->group == obj->individual_count);
   3249         free(obj->group);
   3250       } else {
   3251         obj->next = move->next;
   3252         /* This may decrease to zero; we'll collect GRAY objects (if any) that
   3253          * remain in the group in the third pass. */
   3254         assert(*move->group >= move->individual_count);
   3255         *move->group -= move->individual_count;
   3256       }
   3257 
   3258       /* Add to new group. */
   3259       leader = groupleader(&t, move);
   3260       if (move == leader) {
   3261         /* First object added to new group is its leader. */
   3262         move->group = group(&t, move);
   3263         move->next = move;
   3264         *move->group = move->individual_count;
   3265       } else {
   3266         /* Group already has at least one object in it. */
   3267         assert(leader->group == group(&t, move));
   3268         move->group = group(&t, move);
   3269         move->next = leader->next;
   3270         leader->next = move;
   3271         *move->group += move->individual_count;
   3272       }
   3273 
   3274       move->is_frozen = true;
   3275     }
   3276   }
   3277 
   3278   /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must
   3279    * increment count(to) if group(obj) != group(to) (which could now be the
   3280    * case if "to" was just frozen). */
   3281   upb_inttable_begin(&iter, &t.objattr);
   3282   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
   3283     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
   3284     visit(obj, crossref, &t);
   3285   }
   3286 
   3287   /* Pass 3: GRAY objects are collected if their group's refcount dropped to
   3288    * zero when we removed its white nodes.  This can happen if they had only
   3289    * been kept alive by virtue of sharing a group with an object that was just
   3290    * frozen.
   3291    *
   3292    * It is important that we do this last, since the GRAY object's free()
   3293    * function could call unref2() on just-frozen objects, which will decrement
   3294    * refs that were added in pass 2. */
   3295   upb_inttable_begin(&iter, &t.objattr);
   3296   for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) {
   3297     upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter);
   3298     if (obj->group == NULL || *obj->group == 0) {
   3299       if (obj->group) {
   3300         upb_refcounted *o;
   3301 
   3302         /* We eagerly free() the group's count (since we can't easily determine
   3303          * the group's remaining size it's the easiest way to ensure it gets
   3304          * done). */
   3305         free(obj->group);
   3306 
   3307         /* Visit to release ref2's (done in a separate pass since release_ref2
   3308          * depends on o->group being unmodified so it can test merged()). */
   3309         o = obj;
   3310         do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj);
   3311 
   3312         /* Mark "group" fields as NULL so we know to free the objects later in
   3313          * this loop, but also don't try to delete the group twice. */
   3314         o = obj;
   3315         do { o->group = NULL; } while ((o = o->next) != obj);
   3316       }
   3317       freeobj(obj);
   3318     }
   3319   }
   3320 
   3321 err4:
   3322   if (!ret) {
   3323     upb_inttable_begin(&iter, &t.groups);
   3324     for(; !upb_inttable_done(&iter); upb_inttable_next(&iter))
   3325       free(upb_value_getptr(upb_inttable_iter_value(&iter)));
   3326   }
   3327   upb_inttable_uninit(&t.groups);
   3328 err3:
   3329   upb_inttable_uninit(&t.stack);
   3330 err2:
   3331   upb_inttable_uninit(&t.objattr);
   3332 err1:
   3333   return ret;
   3334 }
   3335 
   3336 
   3337 /* Misc internal functions  ***************************************************/
   3338 
   3339 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) {
   3340   return r->group == r2->group;
   3341 }
   3342 
   3343 static void merge(upb_refcounted *r, upb_refcounted *from) {
   3344   upb_refcounted *base;
   3345   upb_refcounted *tmp;
   3346 
   3347   if (merged(r, from)) return;
   3348   *r->group += *from->group;
   3349   free(from->group);
   3350   base = from;
   3351 
   3352   /* Set all refcount pointers in the "from" chain to the merged refcount.
   3353    *
   3354    * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound
   3355    * if the user continuously extends a group by one object.  Prevent this by
   3356    * using one of the techniques in this paper:
   3357    *     ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */
   3358   do { from->group = r->group; } while ((from = from->next) != base);
   3359 
   3360   /* Merge the two circularly linked lists by swapping their next pointers. */
   3361   tmp = r->next;
   3362   r->next = base->next;
   3363   base->next = tmp;
   3364 }
   3365 
   3366 static void unref(const upb_refcounted *r);
   3367 
   3368 static void release_ref2(const upb_refcounted *obj,
   3369                          const upb_refcounted *subobj,
   3370                          void *closure) {
   3371   UPB_UNUSED(closure);
   3372   untrack(subobj, obj, true);
   3373   if (!merged(obj, subobj)) {
   3374     assert(subobj->is_frozen);
   3375     unref(subobj);
   3376   }
   3377 }
   3378 
   3379 static void unref(const upb_refcounted *r) {
   3380   if (unrefgroup(r->group)) {
   3381     const upb_refcounted *o;
   3382 
   3383     free(r->group);
   3384 
   3385     /* In two passes, since release_ref2 needs a guarantee that any subobjs
   3386      * are alive. */
   3387     o = r;
   3388     do { visit(o, release_ref2, NULL); } while((o = o->next) != r);
   3389 
   3390     o = r;
   3391     do {
   3392       const upb_refcounted *next = o->next;
   3393       assert(o->is_frozen || o->individual_count == 0);
   3394       freeobj((upb_refcounted*)o);
   3395       o = next;
   3396     } while(o != r);
   3397   }
   3398 }
   3399 
   3400 static void freeobj(upb_refcounted *o) {
   3401   trackfree(o);
   3402   o->vtbl->free((upb_refcounted*)o);
   3403 }
   3404 
   3405 
   3406 /* Public interface ***********************************************************/
   3407 
   3408 bool upb_refcounted_init(upb_refcounted *r,
   3409                          const struct upb_refcounted_vtbl *vtbl,
   3410                          const void *owner) {
   3411 #ifndef NDEBUG
   3412   /* Endianness check.  This is unrelated to upb_refcounted, it's just a
   3413    * convenient place to put the check that we can be assured will run for
   3414    * basically every program using upb. */
   3415   const int x = 1;
   3416 #ifdef UPB_BIG_ENDIAN
   3417   assert(*(char*)&x != 1);
   3418 #else
   3419   assert(*(char*)&x == 1);
   3420 #endif
   3421 #endif
   3422 
   3423   r->next = r;
   3424   r->vtbl = vtbl;
   3425   r->individual_count = 0;
   3426   r->is_frozen = false;
   3427   r->group = malloc(sizeof(*r->group));
   3428   if (!r->group) return false;
   3429   *r->group = 0;
   3430   if (!trackinit(r)) {
   3431     free(r->group);
   3432     return false;
   3433   }
   3434   upb_refcounted_ref(r, owner);
   3435   return true;
   3436 }
   3437 
   3438 bool upb_refcounted_isfrozen(const upb_refcounted *r) {
   3439   return r->is_frozen;
   3440 }
   3441 
   3442 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) {
   3443   track(r, owner, false);
   3444   if (!r->is_frozen)
   3445     ((upb_refcounted*)r)->individual_count++;
   3446   refgroup(r->group);
   3447 }
   3448 
   3449 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) {
   3450   untrack(r, owner, false);
   3451   if (!r->is_frozen)
   3452     ((upb_refcounted*)r)->individual_count--;
   3453   unref(r);
   3454 }
   3455 
   3456 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) {
   3457   assert(!from->is_frozen);  /* Non-const pointer implies this. */
   3458   track(r, from, true);
   3459   if (r->is_frozen) {
   3460     refgroup(r->group);
   3461   } else {
   3462     merge((upb_refcounted*)r, from);
   3463   }
   3464 }
   3465 
   3466 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) {
   3467   assert(!from->is_frozen);  /* Non-const pointer implies this. */
   3468   untrack(r, from, true);
   3469   if (r->is_frozen) {
   3470     unref(r);
   3471   } else {
   3472     assert(merged(r, from));
   3473   }
   3474 }
   3475 
   3476 void upb_refcounted_donateref(
   3477     const upb_refcounted *r, const void *from, const void *to) {
   3478   assert(from != to);
   3479   if (to != NULL)
   3480     upb_refcounted_ref(r, to);
   3481   if (from != NULL)
   3482     upb_refcounted_unref(r, from);
   3483 }
   3484 
   3485 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) {
   3486   checkref(r, owner, false);
   3487 }
   3488 
   3489 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s,
   3490                            int maxdepth) {
   3491   int i;
   3492   for (i = 0; i < n; i++) {
   3493     assert(!roots[i]->is_frozen);
   3494   }
   3495   return freeze(roots, n, s, maxdepth);
   3496 }
   3497 
   3498 
   3499 #include <stdlib.h>
   3500 
   3501 /* Fallback implementation if the shim is not specialized by the JIT. */
   3502 #define SHIM_WRITER(type, ctype)                                              \
   3503   bool upb_shim_set ## type (void *c, const void *hd, ctype val) {            \
   3504     uint8_t *m = c;                                                           \
   3505     const upb_shim_data *d = hd;                                              \
   3506     if (d->hasbit > 0)                                                        \
   3507       *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
   3508     *(ctype*)&m[d->offset] = val;                                             \
   3509     return true;                                                              \
   3510   }                                                                           \
   3511 
   3512 SHIM_WRITER(double, double)
   3513 SHIM_WRITER(float,  float)
   3514 SHIM_WRITER(int32,  int32_t)
   3515 SHIM_WRITER(int64,  int64_t)
   3516 SHIM_WRITER(uint32, uint32_t)
   3517 SHIM_WRITER(uint64, uint64_t)
   3518 SHIM_WRITER(bool,   bool)
   3519 #undef SHIM_WRITER
   3520 
   3521 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset,
   3522                   int32_t hasbit) {
   3523   upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
   3524   bool ok;
   3525 
   3526   upb_shim_data *d = malloc(sizeof(*d));
   3527   if (!d) return false;
   3528   d->offset = offset;
   3529   d->hasbit = hasbit;
   3530 
   3531   upb_handlerattr_sethandlerdata(&attr, d);
   3532   upb_handlerattr_setalwaysok(&attr, true);
   3533   upb_handlers_addcleanup(h, d, free);
   3534 
   3535 #define TYPE(u, l) \
   3536   case UPB_TYPE_##u: \
   3537     ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break;
   3538 
   3539   ok = false;
   3540 
   3541   switch (upb_fielddef_type(f)) {
   3542     TYPE(INT64,  int64);
   3543     TYPE(INT32,  int32);
   3544     TYPE(ENUM,   int32);
   3545     TYPE(UINT64, uint64);
   3546     TYPE(UINT32, uint32);
   3547     TYPE(DOUBLE, double);
   3548     TYPE(FLOAT,  float);
   3549     TYPE(BOOL,   bool);
   3550     default: assert(false); break;
   3551   }
   3552 #undef TYPE
   3553 
   3554   upb_handlerattr_uninit(&attr);
   3555   return ok;
   3556 }
   3557 
   3558 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s,
   3559                                       upb_fieldtype_t *type) {
   3560   upb_func *f = upb_handlers_gethandler(h, s);
   3561 
   3562   if ((upb_int64_handlerfunc*)f == upb_shim_setint64) {
   3563     *type = UPB_TYPE_INT64;
   3564   } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) {
   3565     *type = UPB_TYPE_INT32;
   3566   } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) {
   3567     *type = UPB_TYPE_UINT64;
   3568   } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) {
   3569     *type = UPB_TYPE_UINT32;
   3570   } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) {
   3571     *type = UPB_TYPE_DOUBLE;
   3572   } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) {
   3573     *type = UPB_TYPE_FLOAT;
   3574   } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) {
   3575     *type = UPB_TYPE_BOOL;
   3576   } else {
   3577     return NULL;
   3578   }
   3579 
   3580   return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s);
   3581 }
   3582 
   3583 
   3584 #include <stdlib.h>
   3585 #include <string.h>
   3586 
   3587 static void upb_symtab_free(upb_refcounted *r) {
   3588   upb_symtab *s = (upb_symtab*)r;
   3589   upb_strtable_iter i;
   3590   upb_strtable_begin(&i, &s->symtab);
   3591   for (; !upb_strtable_done(&i); upb_strtable_next(&i)) {
   3592     const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i));
   3593     upb_def_unref(def, s);
   3594   }
   3595   upb_strtable_uninit(&s->symtab);
   3596   free(s);
   3597 }
   3598 
   3599 
   3600 upb_symtab *upb_symtab_new(const void *owner) {
   3601   static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free};
   3602   upb_symtab *s = malloc(sizeof(*s));
   3603   upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner);
   3604   upb_strtable_init(&s->symtab, UPB_CTYPE_PTR);
   3605   return s;
   3606 }
   3607 
   3608 void upb_symtab_freeze(upb_symtab *s) {
   3609   upb_refcounted *r;
   3610   bool ok;
   3611 
   3612   assert(!upb_symtab_isfrozen(s));
   3613   r = upb_symtab_upcast_mutable(s);
   3614   /* The symtab does not take ref2's (see refcounted.h) on the defs, because
   3615    * defs cannot refer back to the table and therefore cannot create cycles.  So
   3616    * 0 will suffice for maxdepth here. */
   3617   ok = upb_refcounted_freeze(&r, 1, NULL, 0);
   3618   UPB_ASSERT_VAR(ok, ok);
   3619 }
   3620 
   3621 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) {
   3622   upb_value v;
   3623   upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ?
   3624       upb_value_getptr(v) : NULL;
   3625   return ret;
   3626 }
   3627 
   3628 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
   3629   upb_value v;
   3630   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
   3631       upb_value_getptr(v) : NULL;
   3632   return def ? upb_dyncast_msgdef(def) : NULL;
   3633 }
   3634 
   3635 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
   3636   upb_value v;
   3637   upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ?
   3638       upb_value_getptr(v) : NULL;
   3639   return def ? upb_dyncast_enumdef(def) : NULL;
   3640 }
   3641 
   3642 /* Given a symbol and the base symbol inside which it is defined, find the
   3643  * symbol's definition in t. */
   3644 static upb_def *upb_resolvename(const upb_strtable *t,
   3645                                 const char *base, const char *sym) {
   3646   if(strlen(sym) == 0) return NULL;
   3647   if(sym[0] == '.') {
   3648     /* Symbols starting with '.' are absolute, so we do a single lookup.
   3649      * Slice to omit the leading '.' */
   3650     upb_value v;
   3651     return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL;
   3652   } else {
   3653     /* Remove components from base until we find an entry or run out.
   3654      * TODO: This branch is totally broken, but currently not used. */
   3655     (void)base;
   3656     assert(false);
   3657     return NULL;
   3658   }
   3659 }
   3660 
   3661 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base,
   3662                                   const char *sym) {
   3663   upb_def *ret = upb_resolvename(&s->symtab, base, sym);
   3664   return ret;
   3665 }
   3666 
   3667 /* Starts a depth-first traversal at "def", recursing into any subdefs
   3668  * (ie. submessage types).  Adds duplicates of existing defs to addtab
   3669  * wherever necessary, so that the resulting symtab will be consistent once
   3670  * addtab is added.
   3671  *
   3672  * More specifically, if any def D is found in the DFS that:
   3673  *
   3674  *   1. can reach a def that is being replaced by something in addtab, AND
   3675  *
   3676  *   2. is not itself being replaced already (ie. this name doesn't already
   3677  *      exist in addtab)
   3678  *
   3679  * ...then a duplicate (new copy) of D will be added to addtab.
   3680  *
   3681  * Returns true if this happened for any def reachable from "def."
   3682  *
   3683  * It is slightly tricky to do this correctly in the presence of cycles.  If we
   3684  * detect that our DFS has hit a cycle, we might not yet know if any SCCs on
   3685  * our stack can reach a def in addtab or not.  Once we figure this out, that
   3686  * answer needs to apply to *all* defs in these SCCs, even if we visited them
   3687  * already.  So a straight up one-pass cycle-detecting DFS won't work.
   3688  *
   3689  * To work around this problem, we traverse each SCC (which we already
   3690  * computed, since these defs are frozen) as a single node.  We first compute
   3691  * whether the SCC as a whole can reach any def in addtab, then we dup (or not)
   3692  * the entire SCC.  This requires breaking the encapsulation of upb_refcounted,
   3693  * since that is where we get the data about what SCC we are in. */
   3694 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab,
   3695                             const void *new_owner, upb_inttable *seen,
   3696                             upb_status *s) {
   3697   upb_value v;
   3698   bool need_dup;
   3699   const upb_def *base;
   3700   const void* memoize_key;
   3701 
   3702   /* Memoize results of this function for efficiency (since we're traversing a
   3703    * DAG this is not needed to limit the depth of the search).
   3704    *
   3705    * We memoize by SCC instead of by individual def. */
   3706   memoize_key = def->base.group;
   3707 
   3708   if (upb_inttable_lookupptr(seen, memoize_key, &v))
   3709     return upb_value_getbool(v);
   3710 
   3711   /* Visit submessages for all messages in the SCC. */
   3712   need_dup = false;
   3713   base = def;
   3714   do {
   3715     upb_value v;
   3716     const upb_msgdef *m;
   3717 
   3718     assert(upb_def_isfrozen(def));
   3719     if (def->type == UPB_DEF_FIELD) continue;
   3720     if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) {
   3721       need_dup = true;
   3722     }
   3723 
   3724     /* For messages, continue the recursion by visiting all subdefs, but only
   3725      * ones in different SCCs. */
   3726     m = upb_dyncast_msgdef(def);
   3727     if (m) {
   3728       upb_msg_field_iter i;
   3729       for(upb_msg_field_begin(&i, m);
   3730           !upb_msg_field_done(&i);
   3731           upb_msg_field_next(&i)) {
   3732         upb_fielddef *f = upb_msg_iter_field(&i);
   3733         const upb_def *subdef;
   3734 
   3735         if (!upb_fielddef_hassubdef(f)) continue;
   3736         subdef = upb_fielddef_subdef(f);
   3737 
   3738         /* Skip subdefs in this SCC. */
   3739         if (def->base.group == subdef->base.group) continue;
   3740 
   3741         /* |= to avoid short-circuit; we need its side-effects. */
   3742         need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s);
   3743         if (!upb_ok(s)) return false;
   3744       }
   3745     }
   3746   } while ((def = (upb_def*)def->base.next) != base);
   3747 
   3748   if (need_dup) {
   3749     /* Dup all defs in this SCC that don't already have entries in addtab. */
   3750     def = base;
   3751     do {
   3752       const char *name;
   3753 
   3754       if (def->type == UPB_DEF_FIELD) continue;
   3755       name = upb_def_fullname(def);
   3756       if (!upb_strtable_lookup(addtab, name, NULL)) {
   3757         upb_def *newdef = upb_def_dup(def, new_owner);
   3758         if (!newdef) goto oom;
   3759         newdef->came_from_user = false;
   3760         if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef)))
   3761           goto oom;
   3762       }
   3763     } while ((def = (upb_def*)def->base.next) != base);
   3764   }
   3765 
   3766   upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup));
   3767   return need_dup;
   3768 
   3769 oom:
   3770   upb_status_seterrmsg(s, "out of memory");
   3771   return false;
   3772 }
   3773 
   3774 /* TODO(haberman): we need a lot more testing of error conditions.
   3775  * The came_from_user stuff in particular is not tested. */
   3776 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor,
   3777                     upb_status *status) {
   3778   int i;
   3779   upb_strtable_iter iter;
   3780   upb_def **add_defs = NULL;
   3781   upb_strtable addtab;
   3782   upb_inttable seen;
   3783 
   3784   assert(!upb_symtab_isfrozen(s));
   3785   if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) {
   3786     upb_status_seterrmsg(status, "out of memory");
   3787     return false;
   3788   }
   3789 
   3790   /* Add new defs to our "add" set. */
   3791   for (i = 0; i < n; i++) {
   3792     upb_def *def = defs[i];
   3793     const char *fullname;
   3794     upb_fielddef *f;
   3795 
   3796     if (upb_def_isfrozen(def)) {
   3797       upb_status_seterrmsg(status, "added defs must be mutable");
   3798       goto err;
   3799     }
   3800     assert(!upb_def_isfrozen(def));
   3801     fullname = upb_def_fullname(def);
   3802     if (!fullname) {
   3803       upb_status_seterrmsg(
   3804           status, "Anonymous defs cannot be added to a symtab");
   3805       goto err;
   3806     }
   3807 
   3808     f = upb_dyncast_fielddef_mutable(def);
   3809 
   3810     if (f) {
   3811       if (!upb_fielddef_containingtypename(f)) {
   3812         upb_status_seterrmsg(status,
   3813                              "Standalone fielddefs must have a containing type "
   3814                              "(extendee) name set");
   3815         goto err;
   3816       }
   3817     } else {
   3818       if (upb_strtable_lookup(&addtab, fullname, NULL)) {
   3819         upb_status_seterrf(status, "Conflicting defs named '%s'", fullname);
   3820         goto err;
   3821       }
   3822       /* We need this to back out properly, because if there is a failure we
   3823        * need to donate the ref back to the caller. */
   3824       def->came_from_user = true;
   3825       upb_def_donateref(def, ref_donor, s);
   3826       if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def)))
   3827         goto oom_err;
   3828     }
   3829   }
   3830 
   3831   /* Add standalone fielddefs (ie. extensions) to the appropriate messages.
   3832    * If the appropriate message only exists in the existing symtab, duplicate
   3833    * it so we have a mutable copy we can add the fields to. */
   3834   for (i = 0; i < n; i++) {
   3835     upb_def *def = defs[i];
   3836     upb_fielddef *f = upb_dyncast_fielddef_mutable(def);
   3837     const char *msgname;
   3838     upb_value v;
   3839     upb_msgdef *m;
   3840 
   3841     if (!f) continue;
   3842     msgname = upb_fielddef_containingtypename(f);
   3843     /* We validated this earlier in this function. */
   3844     assert(msgname);
   3845 
   3846     /* If the extendee name is absolutely qualified, move past the initial ".".
   3847      * TODO(haberman): it is not obvious what it would mean if this was not
   3848      * absolutely qualified. */
   3849     if (msgname[0] == '.') {
   3850       msgname++;
   3851     }
   3852 
   3853     if (upb_strtable_lookup(&addtab, msgname, &v)) {
   3854       /* Extendee is in the set of defs the user asked us to add. */
   3855       m = upb_value_getptr(v);
   3856     } else {
   3857       /* Need to find and dup the extendee from the existing symtab. */
   3858       const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname);
   3859       if (!frozen_m) {
   3860         upb_status_seterrf(status,
   3861                            "Tried to extend message %s that does not exist "
   3862                            "in this SymbolTable.",
   3863                            msgname);
   3864         goto err;
   3865       }
   3866       m = upb_msgdef_dup(frozen_m, s);
   3867       if (!m) goto oom_err;
   3868       if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) {
   3869         upb_msgdef_unref(m, s);
   3870         goto oom_err;
   3871       }
   3872     }
   3873 
   3874     if (!upb_msgdef_addfield(m, f, ref_donor, status)) {
   3875       goto err;
   3876     }
   3877   }
   3878 
   3879   /* Add dups of any existing def that can reach a def with the same name as
   3880    * anything in our "add" set. */
   3881   if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err;
   3882   upb_strtable_begin(&iter, &s->symtab);
   3883   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
   3884     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
   3885     upb_resolve_dfs(def, &addtab, s, &seen, status);
   3886     if (!upb_ok(status)) goto err;
   3887   }
   3888   upb_inttable_uninit(&seen);
   3889 
   3890   /* Now using the table, resolve symbolic references for subdefs. */
   3891   upb_strtable_begin(&iter, &addtab);
   3892   for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
   3893     const char *base;
   3894     upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
   3895     upb_msgdef *m = upb_dyncast_msgdef_mutable(def);
   3896     upb_msg_field_iter j;
   3897 
   3898     if (!m) continue;
   3899     /* Type names are resolved relative to the message in which they appear. */
   3900     base = upb_msgdef_fullname(m);
   3901 
   3902     for(upb_msg_field_begin(&j, m);
   3903         !upb_msg_field_done(&j);
   3904         upb_msg_field_next(&j)) {
   3905       upb_fielddef *f = upb_msg_iter_field(&j);
   3906       const char *name = upb_fielddef_subdefname(f);
   3907       if (name && !upb_fielddef_subdef(f)) {
   3908         /* Try the lookup in the current set of to-be-added defs first. If not
   3909          * there, try existing defs. */
   3910         upb_def *subdef = upb_resolvename(&addtab, base, name);
   3911         if (subdef == NULL) {
   3912           subdef = upb_resolvename(&s->symtab, base, name);
   3913         }
   3914         if (subdef == NULL) {
   3915           upb_status_seterrf(
   3916               status, "couldn't resolve name '%s' in message '%s'", name, base);
   3917           goto err;
   3918         } else if (!upb_fielddef_setsubdef(f, subdef, status)) {
   3919           goto err;
   3920         }
   3921       }
   3922     }
   3923   }
   3924 
   3925   /* We need an array of the defs in addtab, for passing to upb_def_freeze. */
   3926   add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab));
   3927   if (add_defs == NULL) goto oom_err;
   3928   upb_strtable_begin(&iter, &addtab);
   3929   for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
   3930     add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter));
   3931   }
   3932 
   3933   if (!upb_def_freeze(add_defs, n, status)) goto err;
   3934 
   3935   /* This must be delayed until all errors have been detected, since error
   3936    * recovery code uses this table to cleanup defs. */
   3937   upb_strtable_uninit(&addtab);
   3938 
   3939   /* TODO(haberman) we don't properly handle errors after this point (like
   3940    * OOM in upb_strtable_insert() below). */
   3941   for (i = 0; i < n; i++) {
   3942     upb_def *def = add_defs[i];
   3943     const char *name = upb_def_fullname(def);
   3944     upb_value v;
   3945     bool success;
   3946 
   3947     if (upb_strtable_remove(&s->symtab, name, &v)) {
   3948       const upb_def *def = upb_value_getptr(v);
   3949       upb_def_unref(def, s);
   3950     }
   3951     success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def));
   3952     UPB_ASSERT_VAR(success, success == true);
   3953   }
   3954   free(add_defs);
   3955   return true;
   3956 
   3957 oom_err:
   3958   upb_status_seterrmsg(status, "out of memory");
   3959 err: {
   3960     /* For defs the user passed in, we need to donate the refs back.  For defs
   3961      * we dup'd, we need to just unref them. */
   3962     upb_strtable_begin(&iter, &addtab);
   3963     for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
   3964       upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter));
   3965       bool came_from_user = def->came_from_user;
   3966       def->came_from_user = false;
   3967       if (came_from_user) {
   3968         upb_def_donateref(def, s, ref_donor);
   3969       } else {
   3970         upb_def_unref(def, s);
   3971       }
   3972     }
   3973   }
   3974   upb_strtable_uninit(&addtab);
   3975   free(add_defs);
   3976   assert(!upb_ok(status));
   3977   return false;
   3978 }
   3979 
   3980 /* Iteration. */
   3981 
   3982 static void advance_to_matching(upb_symtab_iter *iter) {
   3983   if (iter->type == UPB_DEF_ANY)
   3984     return;
   3985 
   3986   while (!upb_strtable_done(&iter->iter) &&
   3987          iter->type != upb_symtab_iter_def(iter)->type) {
   3988     upb_strtable_next(&iter->iter);
   3989   }
   3990 }
   3991 
   3992 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s,
   3993                       upb_deftype_t type) {
   3994   upb_strtable_begin(&iter->iter, &s->symtab);
   3995   iter->type = type;
   3996   advance_to_matching(iter);
   3997 }
   3998 
   3999 void upb_symtab_next(upb_symtab_iter *iter) {
   4000   upb_strtable_next(&iter->iter);
   4001   advance_to_matching(iter);
   4002 }
   4003 
   4004 bool upb_symtab_done(const upb_symtab_iter *iter) {
   4005   return upb_strtable_done(&iter->iter);
   4006 }
   4007 
   4008 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) {
   4009   return upb_value_getptr(upb_strtable_iter_value(&iter->iter));
   4010 }
   4011 /*
   4012 ** upb_table Implementation
   4013 **
   4014 ** Implementation is heavily inspired by Lua's ltable.c.
   4015 */
   4016 
   4017 
   4018 #include <stdlib.h>
   4019 #include <string.h>
   4020 
   4021 #define UPB_MAXARRSIZE 16  /* 64k. */
   4022 
   4023 /* From Chromium. */
   4024 #define ARRAY_SIZE(x) \
   4025     ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
   4026 
   4027 static const double MAX_LOAD = 0.85;
   4028 
   4029 /* The minimum utilization of the array part of a mixed hash/array table.  This
   4030  * is a speed/memory-usage tradeoff (though it's not straightforward because of
   4031  * cache effects).  The lower this is, the more memory we'll use. */
   4032 static const double MIN_DENSITY = 0.1;
   4033 
   4034 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
   4035 
   4036 int log2ceil(uint64_t v) {
   4037   int ret = 0;
   4038   bool pow2 = is_pow2(v);
   4039   while (v >>= 1) ret++;
   4040   ret = pow2 ? ret : ret + 1;  /* Ceiling. */
   4041   return UPB_MIN(UPB_MAXARRSIZE, ret);
   4042 }
   4043 
   4044 char *upb_strdup(const char *s) {
   4045   return upb_strdup2(s, strlen(s));
   4046 }
   4047 
   4048 char *upb_strdup2(const char *s, size_t len) {
   4049   size_t n;
   4050   char *p;
   4051 
   4052   /* Prevent overflow errors. */
   4053   if (len == SIZE_MAX) return NULL;
   4054   /* Always null-terminate, even if binary data; but don't rely on the input to
   4055    * have a null-terminating byte since it may be a raw binary buffer. */
   4056   n = len + 1;
   4057   p = malloc(n);
   4058   if (p) {
   4059     memcpy(p, s, len);
   4060     p[len] = 0;
   4061   }
   4062   return p;
   4063 }
   4064 
   4065 /* A type to represent the lookup key of either a strtable or an inttable. */
   4066 typedef union {
   4067   uintptr_t num;
   4068   struct {
   4069     const char *str;
   4070     size_t len;
   4071   } str;
   4072 } lookupkey_t;
   4073 
   4074 static lookupkey_t strkey2(const char *str, size_t len) {
   4075   lookupkey_t k;
   4076   k.str.str = str;
   4077   k.str.len = len;
   4078   return k;
   4079 }
   4080 
   4081 static lookupkey_t intkey(uintptr_t key) {
   4082   lookupkey_t k;
   4083   k.num = key;
   4084   return k;
   4085 }
   4086 
   4087 typedef uint32_t hashfunc_t(upb_tabkey key);
   4088 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
   4089 
   4090 /* Base table (shared code) ***************************************************/
   4091 
   4092 /* For when we need to cast away const. */
   4093 static upb_tabent *mutable_entries(upb_table *t) {
   4094   return (upb_tabent*)t->entries;
   4095 }
   4096 
   4097 static bool isfull(upb_table *t) {
   4098   return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD;
   4099 }
   4100 
   4101 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) {
   4102   size_t bytes;
   4103 
   4104   t->count = 0;
   4105   t->ctype = ctype;
   4106   t->size_lg2 = size_lg2;
   4107   t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
   4108   bytes = upb_table_size(t) * sizeof(upb_tabent);
   4109   if (bytes > 0) {
   4110     t->entries = malloc(bytes);
   4111     if (!t->entries) return false;
   4112     memset(mutable_entries(t), 0, bytes);
   4113   } else {
   4114     t->entries = NULL;
   4115   }
   4116   return true;
   4117 }
   4118 
   4119 static void uninit(upb_table *t) { free(mutable_entries(t)); }
   4120 
   4121 static upb_tabent *emptyent(upb_table *t) {
   4122   upb_tabent *e = mutable_entries(t) + upb_table_size(t);
   4123   while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); }
   4124 }
   4125 
   4126 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
   4127   return (upb_tabent*)upb_getentry(t, hash);
   4128 }
   4129 
   4130 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
   4131                                    uint32_t hash, eqlfunc_t *eql) {
   4132   const upb_tabent *e;
   4133 
   4134   if (t->size_lg2 == 0) return NULL;
   4135   e = upb_getentry(t, hash);
   4136   if (upb_tabent_isempty(e)) return NULL;
   4137   while (1) {
   4138     if (eql(e->key, key)) return e;
   4139     if ((e = e->next) == NULL) return NULL;
   4140   }
   4141 }
   4142 
   4143 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
   4144                                      uint32_t hash, eqlfunc_t *eql) {
   4145   return (upb_tabent*)findentry(t, key, hash, eql);
   4146 }
   4147 
   4148 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
   4149                    uint32_t hash, eqlfunc_t *eql) {
   4150   const upb_tabent *e = findentry(t, key, hash, eql);
   4151   if (e) {
   4152     if (v) {
   4153       _upb_value_setval(v, e->val.val, t->ctype);
   4154     }
   4155     return true;
   4156   } else {
   4157     return false;
   4158   }
   4159 }
   4160 
   4161 /* The given key must not already exist in the table. */
   4162 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
   4163                    upb_value val, uint32_t hash,
   4164                    hashfunc_t *hashfunc, eqlfunc_t *eql) {
   4165   upb_tabent *mainpos_e;
   4166   upb_tabent *our_e;
   4167 
   4168   UPB_UNUSED(eql);
   4169   UPB_UNUSED(key);
   4170   assert(findentry(t, key, hash, eql) == NULL);
   4171   assert(val.ctype == t->ctype);
   4172 
   4173   t->count++;
   4174   mainpos_e = getentry_mutable(t, hash);
   4175   our_e = mainpos_e;
   4176 
   4177   if (upb_tabent_isempty(mainpos_e)) {
   4178     /* Our main position is empty; use it. */
   4179     our_e->next = NULL;
   4180   } else {
   4181     /* Collision. */
   4182     upb_tabent *new_e = emptyent(t);
   4183     /* Head of collider's chain. */
   4184     upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
   4185     if (chain == mainpos_e) {
   4186       /* Existing ent is in its main posisiton (it has the same hash as us, and
   4187        * is the head of our chain).  Insert to new ent and append to this chain. */
   4188       new_e->next = mainpos_e->next;
   4189       mainpos_e->next = new_e;
   4190       our_e = new_e;
   4191     } else {
   4192       /* Existing ent is not in its main position (it is a node in some other
   4193        * chain).  This implies that no existing ent in the table has our hash.
   4194        * Evict it (updating its chain) and use its ent for head of our chain. */
   4195       *new_e = *mainpos_e;  /* copies next. */
   4196       while (chain->next != mainpos_e) {
   4197         chain = (upb_tabent*)chain->next;
   4198         assert(chain);
   4199       }
   4200       chain->next = new_e;
   4201       our_e = mainpos_e;
   4202       our_e->next = NULL;
   4203     }
   4204   }
   4205   our_e->key = tabkey;
   4206   our_e->val.val = val.val;
   4207   assert(findentry(t, key, hash, eql) == our_e);
   4208 }
   4209 
   4210 static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
   4211                upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
   4212   upb_tabent *chain = getentry_mutable(t, hash);
   4213   if (upb_tabent_isempty(chain)) return false;
   4214   if (eql(chain->key, key)) {
   4215     /* Element to remove is at the head of its chain. */
   4216     t->count--;
   4217     if (val) {
   4218       _upb_value_setval(val, chain->val.val, t->ctype);
   4219     }
   4220     if (chain->next) {
   4221       upb_tabent *move = (upb_tabent*)chain->next;
   4222       *chain = *move;
   4223       if (removed) *removed = move->key;
   4224       move->key = 0;  /* Make the slot empty. */
   4225     } else {
   4226       if (removed) *removed = chain->key;
   4227       chain->key = 0;  /* Make the slot empty. */
   4228     }
   4229     return true;
   4230   } else {
   4231     /* Element to remove is either in a non-head position or not in the
   4232      * table. */
   4233     while (chain->next && !eql(chain->next->key, key))
   4234       chain = (upb_tabent*)chain->next;
   4235     if (chain->next) {
   4236       /* Found element to remove. */
   4237       upb_tabent *rm;
   4238 
   4239       if (val) {
   4240         _upb_value_setval(val, chain->next->val.val, t->ctype);
   4241       }
   4242       rm = (upb_tabent*)chain->next;
   4243       if (removed) *removed = rm->key;
   4244       rm->key = 0;
   4245       chain->next = rm->next;
   4246       t->count--;
   4247       return true;
   4248     } else {
   4249       return false;
   4250     }
   4251   }
   4252 }
   4253 
   4254 static size_t next(const upb_table *t, size_t i) {
   4255   do {
   4256     if (++i >= upb_table_size(t))
   4257       return SIZE_MAX;
   4258   } while(upb_tabent_isempty(&t->entries[i]));
   4259 
   4260   return i;
   4261 }
   4262 
   4263 static size_t begin(const upb_table *t) {
   4264   return next(t, -1);
   4265 }
   4266 
   4267 
   4268 /* upb_strtable ***************************************************************/
   4269 
   4270 /* A simple "subclass" of upb_table that only adds a hash function for strings. */
   4271 
   4272 static upb_tabkey strcopy(lookupkey_t k2) {
   4273   char *str = malloc(k2.str.len + sizeof(uint32_t) + 1);
   4274   if (str == NULL) return 0;
   4275   memcpy(str, &k2.str.len, sizeof(uint32_t));
   4276   memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1);
   4277   return (uintptr_t)str;
   4278 }
   4279 
   4280 static uint32_t strhash(upb_tabkey key) {
   4281   uint32_t len;
   4282   char *str = upb_tabstr(key, &len);
   4283   return MurmurHash2(str, len, 0);
   4284 }
   4285 
   4286 static bool streql(upb_tabkey k1, lookupkey_t k2) {
   4287   uint32_t len;
   4288   char *str = upb_tabstr(k1, &len);
   4289   return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
   4290 }
   4291 
   4292 bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) {
   4293   return init(&t->t, ctype, 2);
   4294 }
   4295 
   4296 void upb_strtable_uninit(upb_strtable *t) {
   4297   size_t i;
   4298   for (i = 0; i < upb_table_size(&t->t); i++)
   4299     free((void*)t->t.entries[i].key);
   4300   uninit(&t->t);
   4301 }
   4302 
   4303 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) {
   4304   upb_strtable new_table;
   4305   upb_strtable_iter i;
   4306 
   4307   if (!init(&new_table.t, t->t.ctype, size_lg2))
   4308     return false;
   4309   upb_strtable_begin(&i, t);
   4310   for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
   4311     upb_strtable_insert2(
   4312         &new_table,
   4313         upb_strtable_iter_key(&i),
   4314         upb_strtable_iter_keylength(&i),
   4315         upb_strtable_iter_value(&i));
   4316   }
   4317   upb_strtable_uninit(t);
   4318   *t = new_table;
   4319   return true;
   4320 }
   4321 
   4322 bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len,
   4323                           upb_value v) {
   4324   lookupkey_t key;
   4325   upb_tabkey tabkey;
   4326   uint32_t hash;
   4327 
   4328   if (isfull(&t->t)) {
   4329     /* Need to resize.  New table of double the size, add old elements to it. */
   4330     if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) {
   4331       return false;
   4332     }
   4333   }
   4334 
   4335   key = strkey2(k, len);
   4336   tabkey = strcopy(key);
   4337   if (tabkey == 0) return false;
   4338 
   4339   hash = MurmurHash2(key.str.str, key.str.len, 0);
   4340   insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
   4341   return true;
   4342 }
   4343 
   4344 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
   4345                           upb_value *v) {
   4346   uint32_t hash = MurmurHash2(key, len, 0);
   4347   return lookup(&t->t, strkey2(key, len), v, hash, &streql);
   4348 }
   4349 
   4350 bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len,
   4351                          upb_value *val) {
   4352   uint32_t hash = MurmurHash2(key, strlen(key), 0);
   4353   upb_tabkey tabkey;
   4354   if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
   4355     free((void*)tabkey);
   4356     return true;
   4357   } else {
   4358     return false;
   4359   }
   4360 }
   4361 
   4362 /* Iteration */
   4363 
   4364 static const upb_tabent *str_tabent(const upb_strtable_iter *i) {
   4365   return &i->t->t.entries[i->index];
   4366 }
   4367 
   4368 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
   4369   i->t = t;
   4370   i->index = begin(&t->t);
   4371 }
   4372 
   4373 void upb_strtable_next(upb_strtable_iter *i) {
   4374   i->index = next(&i->t->t, i->index);
   4375 }
   4376 
   4377 bool upb_strtable_done(const upb_strtable_iter *i) {
   4378   return i->index >= upb_table_size(&i->t->t) ||
   4379          upb_tabent_isempty(str_tabent(i));
   4380 }
   4381 
   4382 const char *upb_strtable_iter_key(upb_strtable_iter *i) {
   4383   assert(!upb_strtable_done(i));
   4384   return upb_tabstr(str_tabent(i)->key, NULL);
   4385 }
   4386 
   4387 size_t upb_strtable_iter_keylength(upb_strtable_iter *i) {
   4388   uint32_t len;
   4389   assert(!upb_strtable_done(i));
   4390   upb_tabstr(str_tabent(i)->key, &len);
   4391   return len;
   4392 }
   4393 
   4394 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
   4395   assert(!upb_strtable_done(i));
   4396   return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype);
   4397 }
   4398 
   4399 void upb_strtable_iter_setdone(upb_strtable_iter *i) {
   4400   i->index = SIZE_MAX;
   4401 }
   4402 
   4403 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
   4404                                const upb_strtable_iter *i2) {
   4405   if (upb_strtable_done(i1) && upb_strtable_done(i2))
   4406     return true;
   4407   return i1->t == i2->t && i1->index == i2->index;
   4408 }
   4409 
   4410 
   4411 /* upb_inttable ***************************************************************/
   4412 
   4413 /* For inttables we use a hybrid structure where small keys are kept in an
   4414  * array and large keys are put in the hash table. */
   4415 
   4416 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
   4417 
   4418 static bool inteql(upb_tabkey k1, lookupkey_t k2) {
   4419   return k1 == k2.num;
   4420 }
   4421 
   4422 static upb_tabval *mutable_array(upb_inttable *t) {
   4423   return (upb_tabval*)t->array;
   4424 }
   4425 
   4426 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
   4427   if (key < t->array_size) {
   4428     return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
   4429   } else {
   4430     upb_tabent *e =
   4431         findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
   4432     return e ? &e->val : NULL;
   4433   }
   4434 }
   4435 
   4436 static const upb_tabval *inttable_val_const(const upb_inttable *t,
   4437                                             uintptr_t key) {
   4438   return inttable_val((upb_inttable*)t, key);
   4439 }
   4440 
   4441 size_t upb_inttable_count(const upb_inttable *t) {
   4442   return t->t.count + t->array_count;
   4443 }
   4444 
   4445 static void check(upb_inttable *t) {
   4446   UPB_UNUSED(t);
   4447 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
   4448   {
   4449     /* This check is very expensive (makes inserts/deletes O(N)). */
   4450     size_t count = 0;
   4451     upb_inttable_iter i;
   4452     upb_inttable_begin(&i, t);
   4453     for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
   4454       assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
   4455     }
   4456     assert(count == upb_inttable_count(t));
   4457   }
   4458 #endif
   4459 }
   4460 
   4461 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype,
   4462                             size_t asize, int hsize_lg2) {
   4463   size_t array_bytes;
   4464 
   4465   if (!init(&t->t, ctype, hsize_lg2)) return false;
   4466   /* Always make the array part at least 1 long, so that we know key 0
   4467    * won't be in the hash part, which simplifies things. */
   4468   t->array_size = UPB_MAX(1, asize);
   4469   t->array_count = 0;
   4470   array_bytes = t->array_size * sizeof(upb_value);
   4471   t->array = malloc(array_bytes);
   4472   if (!t->array) {
   4473     uninit(&t->t);
   4474     return false;
   4475   }
   4476   memset(mutable_array(t), 0xff, array_bytes);
   4477   check(t);
   4478   return true;
   4479 }
   4480 
   4481 bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) {
   4482   return upb_inttable_sizedinit(t, ctype, 0, 4);
   4483 }
   4484 
   4485 void upb_inttable_uninit(upb_inttable *t) {
   4486   uninit(&t->t);
   4487   free(mutable_array(t));
   4488 }
   4489 
   4490 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) {
   4491   /* XXX: Table can't store value (uint64_t)-1.  Need to somehow statically
   4492    * guarantee that this is not necessary, or fix the limitation. */
   4493   upb_tabval tabval;
   4494   tabval.val = val.val;
   4495   UPB_UNUSED(tabval);
   4496   assert(upb_arrhas(tabval));
   4497 
   4498   if (key < t->array_size) {
   4499     assert(!upb_arrhas(t->array[key]));
   4500     t->array_count++;
   4501     mutable_array(t)[key].val = val.val;
   4502   } else {
   4503     if (isfull(&t->t)) {
   4504       /* Need to resize the hash part, but we re-use the array part. */
   4505       size_t i;
   4506       upb_table new_table;
   4507       if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1))
   4508         return false;
   4509       for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
   4510         const upb_tabent *e = &t->t.entries[i];
   4511         uint32_t hash;
   4512         upb_value v;
   4513 
   4514         _upb_value_setval(&v, e->val.val, t->t.ctype);
   4515         hash = upb_inthash(e->key);
   4516         insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
   4517       }
   4518 
   4519       assert(t->t.count == new_table.count);
   4520 
   4521       uninit(&t->t);
   4522       t->t = new_table;
   4523     }
   4524     insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
   4525   }
   4526   check(t);
   4527   return true;
   4528 }
   4529 
   4530 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
   4531   const upb_tabval *table_v = inttable_val_const(t, key);
   4532   if (!table_v) return false;
   4533   if (v) _upb_value_setval(v, table_v->val, t->t.ctype);
   4534   return true;
   4535 }
   4536 
   4537 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
   4538   upb_tabval *table_v = inttable_val(t, key);
   4539   if (!table_v) return false;
   4540   table_v->val = val.val;
   4541   return true;
   4542 }
   4543 
   4544 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
   4545   bool success;
   4546   if (key < t->array_size) {
   4547     if (upb_arrhas(t->array[key])) {
   4548       upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
   4549       t->array_count--;
   4550       if (val) {
   4551         _upb_value_setval(val, t->array[key].val, t->t.ctype);
   4552       }
   4553       mutable_array(t)[key] = empty;
   4554       success = true;
   4555     } else {
   4556       success = false;
   4557     }
   4558   } else {
   4559     upb_tabkey removed;
   4560     uint32_t hash = upb_inthash(key);
   4561     success = rm(&t->t, intkey(key), val, &removed, hash, &inteql);
   4562   }
   4563   check(t);
   4564   return success;
   4565 }
   4566 
   4567 bool upb_inttable_push(upb_inttable *t, upb_value val) {
   4568   return upb_inttable_insert(t, upb_inttable_count(t), val);
   4569 }
   4570 
   4571 upb_value upb_inttable_pop(upb_inttable *t) {
   4572   upb_value val;
   4573   bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
   4574   UPB_ASSERT_VAR(ok, ok);
   4575   return val;
   4576 }
   4577 
   4578 bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) {
   4579   return upb_inttable_insert(t, (uintptr_t)key, val);
   4580 }
   4581 
   4582 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
   4583                             upb_value *v) {
   4584   return upb_inttable_lookup(t, (uintptr_t)key, v);
   4585 }
   4586 
   4587 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
   4588   return upb_inttable_remove(t, (uintptr_t)key, val);
   4589 }
   4590 
   4591 void upb_inttable_compact(upb_inttable *t) {
   4592   /* Create a power-of-two histogram of the table keys. */
   4593   int counts[UPB_MAXARRSIZE + 1] = {0};
   4594   uintptr_t max_key = 0;
   4595   upb_inttable_iter i;
   4596   size_t arr_size;
   4597   int arr_count;
   4598   upb_inttable new_t;
   4599 
   4600   upb_inttable_begin(&i, t);
   4601   for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   4602     uintptr_t key = upb_inttable_iter_key(&i);
   4603     if (key > max_key) {
   4604       max_key = key;
   4605     }
   4606     counts[log2ceil(key)]++;
   4607   }
   4608 
   4609   arr_size = 1;
   4610   arr_count = upb_inttable_count(t);
   4611 
   4612   if (upb_inttable_count(t) >= max_key * MIN_DENSITY) {
   4613     /* We can put 100% of the entries in the array part. */
   4614     arr_size = max_key + 1;
   4615   } else {
   4616     /* Find the largest power of two that satisfies the MIN_DENSITY
   4617      * definition. */
   4618     int size_lg2;
   4619     for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) {
   4620       arr_size = 1 << size_lg2;
   4621       arr_count -= counts[size_lg2];
   4622       if (arr_count >= arr_size * MIN_DENSITY) {
   4623         break;
   4624       }
   4625     }
   4626   }
   4627 
   4628   /* Array part must always be at least 1 entry large to catch lookups of key
   4629    * 0.  Key 0 must always be in the array part because "0" in the hash part
   4630    * denotes an empty entry. */
   4631   arr_size = UPB_MAX(arr_size, 1);
   4632 
   4633   {
   4634     /* Insert all elements into new, perfectly-sized table. */
   4635     int hash_count = upb_inttable_count(t) - arr_count;
   4636     int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
   4637     int hashsize_lg2 = log2ceil(hash_size);
   4638 
   4639     assert(hash_count >= 0);
   4640     upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2);
   4641     upb_inttable_begin(&i, t);
   4642     for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   4643       uintptr_t k = upb_inttable_iter_key(&i);
   4644       upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i));
   4645     }
   4646     assert(new_t.array_size == arr_size);
   4647     assert(new_t.t.size_lg2 == hashsize_lg2);
   4648   }
   4649   upb_inttable_uninit(t);
   4650   *t = new_t;
   4651 }
   4652 
   4653 /* Iteration. */
   4654 
   4655 static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
   4656   assert(!i->array_part);
   4657   return &i->t->t.entries[i->index];
   4658 }
   4659 
   4660 static upb_tabval int_arrent(const upb_inttable_iter *i) {
   4661   assert(i->array_part);
   4662   return i->t->array[i->index];
   4663 }
   4664 
   4665 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
   4666   i->t = t;
   4667   i->index = -1;
   4668   i->array_part = true;
   4669   upb_inttable_next(i);
   4670 }
   4671 
   4672 void upb_inttable_next(upb_inttable_iter *iter) {
   4673   const upb_inttable *t = iter->t;
   4674   if (iter->array_part) {
   4675     while (++iter->index < t->array_size) {
   4676       if (upb_arrhas(int_arrent(iter))) {
   4677         return;
   4678       }
   4679     }
   4680     iter->array_part = false;
   4681     iter->index = begin(&t->t);
   4682   } else {
   4683     iter->index = next(&t->t, iter->index);
   4684   }
   4685 }
   4686 
   4687 bool upb_inttable_done(const upb_inttable_iter *i) {
   4688   if (i->array_part) {
   4689     return i->index >= i->t->array_size ||
   4690            !upb_arrhas(int_arrent(i));
   4691   } else {
   4692     return i->index >= upb_table_size(&i->t->t) ||
   4693            upb_tabent_isempty(int_tabent(i));
   4694   }
   4695 }
   4696 
   4697 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
   4698   assert(!upb_inttable_done(i));
   4699   return i->array_part ? i->index : int_tabent(i)->key;
   4700 }
   4701 
   4702 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
   4703   assert(!upb_inttable_done(i));
   4704   return _upb_value_val(
   4705       i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val,
   4706       i->t->t.ctype);
   4707 }
   4708 
   4709 void upb_inttable_iter_setdone(upb_inttable_iter *i) {
   4710   i->index = SIZE_MAX;
   4711   i->array_part = false;
   4712 }
   4713 
   4714 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
   4715                                           const upb_inttable_iter *i2) {
   4716   if (upb_inttable_done(i1) && upb_inttable_done(i2))
   4717     return true;
   4718   return i1->t == i2->t && i1->index == i2->index &&
   4719          i1->array_part == i2->array_part;
   4720 }
   4721 
   4722 #ifdef UPB_UNALIGNED_READS_OK
   4723 /* -----------------------------------------------------------------------------
   4724  * MurmurHash2, by Austin Appleby (released as public domain).
   4725  * Reformatted and C99-ified by Joshua Haberman.
   4726  * Note - This code makes a few assumptions about how your machine behaves -
   4727  *   1. We can read a 4-byte value from any address without crashing
   4728  *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
   4729  * And it has a few limitations -
   4730  *   1. It will not work incrementally.
   4731  *   2. It will not produce the same results on little-endian and big-endian
   4732  *      machines. */
   4733 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) {
   4734   /* 'm' and 'r' are mixing constants generated offline.
   4735    * They're not really 'magic', they just happen to work well. */
   4736   const uint32_t m = 0x5bd1e995;
   4737   const int32_t r = 24;
   4738 
   4739   /* Initialize the hash to a 'random' value */
   4740   uint32_t h = seed ^ len;
   4741 
   4742   /* Mix 4 bytes at a time into the hash */
   4743   const uint8_t * data = (const uint8_t *)key;
   4744   while(len >= 4) {
   4745     uint32_t k = *(uint32_t *)data;
   4746 
   4747     k *= m;
   4748     k ^= k >> r;
   4749     k *= m;
   4750 
   4751     h *= m;
   4752     h ^= k;
   4753 
   4754     data += 4;
   4755     len -= 4;
   4756   }
   4757 
   4758   /* Handle the last few bytes of the input array */
   4759   switch(len) {
   4760     case 3: h ^= data[2] << 16;
   4761     case 2: h ^= data[1] << 8;
   4762     case 1: h ^= data[0]; h *= m;
   4763   };
   4764 
   4765   /* Do a few final mixes of the hash to ensure the last few
   4766    * bytes are well-incorporated. */
   4767   h ^= h >> 13;
   4768   h *= m;
   4769   h ^= h >> 15;
   4770 
   4771   return h;
   4772 }
   4773 
   4774 #else /* !UPB_UNALIGNED_READS_OK */
   4775 
   4776 /* -----------------------------------------------------------------------------
   4777  * MurmurHashAligned2, by Austin Appleby
   4778  * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
   4779  * on certain platforms.
   4780  * Performance will be lower than MurmurHash2 */
   4781 
   4782 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
   4783 
   4784 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) {
   4785   const uint32_t m = 0x5bd1e995;
   4786   const int32_t r = 24;
   4787   const uint8_t * data = (const uint8_t *)key;
   4788   uint32_t h = seed ^ len;
   4789   uint8_t align = (uintptr_t)data & 3;
   4790 
   4791   if(align && (len >= 4)) {
   4792     /* Pre-load the temp registers */
   4793     uint32_t t = 0, d = 0;
   4794     int32_t sl;
   4795     int32_t sr;
   4796 
   4797     switch(align) {
   4798       case 1: t |= data[2] << 16;
   4799       case 2: t |= data[1] << 8;
   4800       case 3: t |= data[0];
   4801     }
   4802 
   4803     t <<= (8 * align);
   4804 
   4805     data += 4-align;
   4806     len -= 4-align;
   4807 
   4808     sl = 8 * (4-align);
   4809     sr = 8 * align;
   4810 
   4811     /* Mix */
   4812 
   4813     while(len >= 4) {
   4814       uint32_t k;
   4815 
   4816       d = *(uint32_t *)data;
   4817       t = (t >> sr) | (d << sl);
   4818 
   4819       k = t;
   4820 
   4821       MIX(h,k,m);
   4822 
   4823       t = d;
   4824 
   4825       data += 4;
   4826       len -= 4;
   4827     }
   4828 
   4829     /* Handle leftover data in temp registers */
   4830 
   4831     d = 0;
   4832 
   4833     if(len >= align) {
   4834       uint32_t k;
   4835 
   4836       switch(align) {
   4837         case 3: d |= data[2] << 16;
   4838         case 2: d |= data[1] << 8;
   4839         case 1: d |= data[0];
   4840       }
   4841 
   4842       k = (t >> sr) | (d << sl);
   4843       MIX(h,k,m);
   4844 
   4845       data += align;
   4846       len -= align;
   4847 
   4848       /* ----------
   4849        * Handle tail bytes */
   4850 
   4851       switch(len) {
   4852         case 3: h ^= data[2] << 16;
   4853         case 2: h ^= data[1] << 8;
   4854         case 1: h ^= data[0]; h *= m;
   4855       };
   4856     } else {
   4857       switch(len) {
   4858         case 3: d |= data[2] << 16;
   4859         case 2: d |= data[1] << 8;
   4860         case 1: d |= data[0];
   4861         case 0: h ^= (t >> sr) | (d << sl); h *= m;
   4862       }
   4863     }
   4864 
   4865     h ^= h >> 13;
   4866     h *= m;
   4867     h ^= h >> 15;
   4868 
   4869     return h;
   4870   } else {
   4871     while(len >= 4) {
   4872       uint32_t k = *(uint32_t *)data;
   4873 
   4874       MIX(h,k,m);
   4875 
   4876       data += 4;
   4877       len -= 4;
   4878     }
   4879 
   4880     /* ----------
   4881      * Handle tail bytes */
   4882 
   4883     switch(len) {
   4884       case 3: h ^= data[2] << 16;
   4885       case 2: h ^= data[1] << 8;
   4886       case 1: h ^= data[0]; h *= m;
   4887     };
   4888 
   4889     h ^= h >> 13;
   4890     h *= m;
   4891     h ^= h >> 15;
   4892 
   4893     return h;
   4894   }
   4895 }
   4896 #undef MIX
   4897 
   4898 #endif /* UPB_UNALIGNED_READS_OK */
   4899 
   4900 #include <errno.h>
   4901 #include <stdarg.h>
   4902 #include <stddef.h>
   4903 #include <stdint.h>
   4904 #include <stdio.h>
   4905 #include <stdlib.h>
   4906 #include <string.h>
   4907 
   4908 bool upb_dumptostderr(void *closure, const upb_status* status) {
   4909   UPB_UNUSED(closure);
   4910   fprintf(stderr, "%s\n", upb_status_errmsg(status));
   4911   return false;
   4912 }
   4913 
   4914 /* Guarantee null-termination and provide ellipsis truncation.
   4915  * It may be tempting to "optimize" this by initializing these final
   4916  * four bytes up-front and then being careful never to overwrite them,
   4917  * this is safer and simpler. */
   4918 static void nullz(upb_status *status) {
   4919   const char *ellipsis = "...";
   4920   size_t len = strlen(ellipsis);
   4921   assert(sizeof(status->msg) > len);
   4922   memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len);
   4923 }
   4924 
   4925 void upb_status_clear(upb_status *status) {
   4926   if (!status) return;
   4927   status->ok_ = true;
   4928   status->code_ = 0;
   4929   status->msg[0] = '\0';
   4930 }
   4931 
   4932 bool upb_ok(const upb_status *status) { return status->ok_; }
   4933 
   4934 upb_errorspace *upb_status_errspace(const upb_status *status) {
   4935   return status->error_space_;
   4936 }
   4937 
   4938 int upb_status_errcode(const upb_status *status) { return status->code_; }
   4939 
   4940 const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
   4941 
   4942 void upb_status_seterrmsg(upb_status *status, const char *msg) {
   4943   if (!status) return;
   4944   status->ok_ = false;
   4945   strncpy(status->msg, msg, sizeof(status->msg));
   4946   nullz(status);
   4947 }
   4948 
   4949 void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
   4950   va_list args;
   4951   va_start(args, fmt);
   4952   upb_status_vseterrf(status, fmt, args);
   4953   va_end(args);
   4954 }
   4955 
   4956 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
   4957   if (!status) return;
   4958   status->ok_ = false;
   4959   _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
   4960   nullz(status);
   4961 }
   4962 
   4963 void upb_status_seterrcode(upb_status *status, upb_errorspace *space,
   4964                            int code) {
   4965   if (!status) return;
   4966   status->ok_ = false;
   4967   status->error_space_ = space;
   4968   status->code_ = code;
   4969   space->set_message(status, code);
   4970 }
   4971 
   4972 void upb_status_copy(upb_status *to, const upb_status *from) {
   4973   if (!to) return;
   4974   *to = *from;
   4975 }
   4976 /* This file was generated by upbc (the upb compiler).
   4977  * Do not edit -- your changes will be discarded when the file is
   4978  * regenerated. */
   4979 
   4980 
   4981 static const upb_msgdef msgs[20];
   4982 static const upb_fielddef fields[81];
   4983 static const upb_enumdef enums[4];
   4984 static const upb_tabent strentries[236];
   4985 static const upb_tabent intentries[14];
   4986 static const upb_tabval arrays[232];
   4987 
   4988 #ifdef UPB_DEBUG_REFS
   4989 static upb_inttable reftables[212];
   4990 #endif
   4991 
   4992 static const upb_msgdef msgs[20] = {
   4993   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]),
   4994   UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]),
   4995   UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]),
   4996   UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]),
   4997   UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]),
   4998   UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]),
   4999   UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UPB_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]),
   5000   UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]),
   5001   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]),
   5002   UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]),
   5003   UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]),
   5004   UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]),
   5005   UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]),
   5006   UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]),
   5007   UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]),
   5008   UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]),
   5009   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[128]),&reftables[32], &reftables[33]),
   5010   UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]),
   5011   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]),
   5012   UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]),
   5013 };
   5014 
   5015 static const upb_fielddef fields[81] = {
   5016   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]),
   5017   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]),
   5018   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]),
   5019   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]),
   5020   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]),
   5021   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]),
   5022   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]),
   5023   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]),
   5024   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]),
   5025   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]),
   5026   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]),
   5027   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]),
   5028   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]),
   5029   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]),
   5030   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]),
   5031   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]),
   5032   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]),
   5033   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]),
   5034   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]),
   5035   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]),
   5036   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]),
   5037   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]),
   5038   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]),
   5039   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]),
   5040   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]),
   5041   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]),
   5042   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]),
   5043   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]),
   5044   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]),
   5045   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]),
   5046   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]),
   5047   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]),
   5048   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]),
   5049   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]),
   5050   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]),
   5051   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]),
   5052   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]),
   5053   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]),
   5054   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]),
   5055   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]),
   5056   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]),
   5057   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]),
   5058   UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]),
   5059   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]),
   5060   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]),
   5061   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]),
   5062   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]),
   5063   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]),
   5064   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]),
   5065   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]),
   5066   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]),
   5067   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]),
   5068   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]),
   5069   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]),
   5070   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]),
   5071   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]),
   5072   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]),
   5073   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]),
   5074   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]),
   5075   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]),
   5076   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]),
   5077   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]),
   5078   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]),
   5079   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]),
   5080   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]),
   5081   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]),
   5082   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]),
   5083   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]),
   5084   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]),
   5085   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]),
   5086   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]),
   5087   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]),
   5088   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]),
   5089   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]),
   5090   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]),
   5091   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]),
   5092   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]),
   5093   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]),
   5094   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]),
   5095   UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]),
   5096   UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]),
   5097 };
   5098 
   5099 static const upb_enumdef enums[4] = {
   5100   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]),
   5101   UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]),
   5102   UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]),
   5103   UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]),
   5104 };
   5105 
   5106 static const upb_tabent strentries[236] = {
   5107   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL},
   5108   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5109   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5110   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL},
   5111   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5112   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5113   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5114   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL},
   5115   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL},
   5116   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5117   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL},
   5118   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5119   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5120   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5121   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL},
   5122   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[9]), &strentries[14]},
   5123   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL},
   5124   {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL},
   5125   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5126   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5127   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5128   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL},
   5129   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL},
   5130   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[40]), &strentries[22]},
   5131   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
   5132   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5133   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL},
   5134   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5135   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL},
   5136   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5137   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL},
   5138   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[30]},
   5139   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
   5140   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5141   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5142   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5143   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5144   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL},
   5145   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5146   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL},
   5147   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5148   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5149   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5150   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5151   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[49]},
   5152   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5153   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5154   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL},
   5155   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL},
   5156   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[48]},
   5157   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL},
   5158   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL},
   5159   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_TABVALUE_PTR_INIT(&fields[11]), &strentries[67]},
   5160   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5161   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL},
   5162   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5163   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5164   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5165   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5166   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL},
   5167   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL},
   5168   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5169   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL},
   5170   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5171   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5172   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL},
   5173   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5174   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
   5175   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL},
   5176   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), NULL},
   5177   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5178   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL},
   5179   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL},
   5180   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5181   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL},
   5182   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5183   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5184   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5185   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL},
   5186   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[32]), NULL},
   5187   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL},
   5188   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[82]},
   5189   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL},
   5190   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[61]), &strentries[81]},
   5191   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5192   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL},
   5193   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5194   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5195   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
   5196   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5197   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL},
   5198   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5199   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[24]), NULL},
   5200   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5201   {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[23]), &strentries[102]},
   5202   {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL},
   5203   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5204   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5205   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5206   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL},
   5207   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL},
   5208   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL},
   5209   {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL},
   5210   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[25]), NULL},
   5211   {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]},
   5212   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5213   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
   5214   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL},
   5215   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5216   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5217   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5218   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL},
   5219   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL},
   5220   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5221   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL},
   5222   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL},
   5223   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
   5224   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5225   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5226   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5227   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5228   {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[54]), &strentries[122]},
   5229   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[33]), NULL},
   5230   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[121]},
   5231   {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
   5232   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5233   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5234   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5235   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5236   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5237   {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL},
   5238   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5239   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5240   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5241   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5242   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[65]), &strentries[139]},
   5243   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5244   {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL},
   5245   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[29]), &strentries[137]},
   5246   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL},
   5247   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL},
   5248   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5249   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5250   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL},
   5251   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5252   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5253   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5254   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[43]), NULL},
   5255   {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL},
   5256   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5257   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5258   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5259   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5260   {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL},
   5261   {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL},
   5262   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[67]), &strentries[154]},
   5263   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5264   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5265   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL},
   5266   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[42]), NULL},
   5267   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[162]},
   5268   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5269   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL},
   5270   {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL},
   5271   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL},
   5272   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5273   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5274   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5275   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5276   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL},
   5277   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[193]},
   5278   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL},
   5279   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5280   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL},
   5281   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL},
   5282   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL},
   5283   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5284   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[194]},
   5285   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5286   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5287   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[191]},
   5288   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5289   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5290   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5291   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5292   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL},
   5293   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL},
   5294   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5295   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[190]},
   5296   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5297   {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL},
   5298   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL},
   5299   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL},
   5300   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL},
   5301   {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL},
   5302   {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL},
   5303   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5304   {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL},
   5305   {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[197]},
   5306   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL},
   5307   {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL},
   5308   {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[203]},
   5309   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5310   {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL},
   5311   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5312   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5313   {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL},
   5314   {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL},
   5315   {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL},
   5316   {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL},
   5317   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5318   {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL},
   5319   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5320   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5321   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5322   {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]},
   5323   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5324   {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL},
   5325   {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL},
   5326   {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL},
   5327   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5328   {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL},
   5329   {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL},
   5330   {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL},
   5331   {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]},
   5332   {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL},
   5333   {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL},
   5334   {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]},
   5335   {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]},
   5336   {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL},
   5337   {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL},
   5338   {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]},
   5339   {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL},
   5340   {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL},
   5341   {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]},
   5342   {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL},
   5343 };
   5344 
   5345 static const upb_tabent intentries[14] = {
   5346   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5347   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL},
   5348   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5349   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL},
   5350   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5351   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL},
   5352   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5353   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL},
   5354   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5355   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL},
   5356   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5357   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL},
   5358   {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL},
   5359   {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL},
   5360 };
   5361 
   5362 static const upb_tabval arrays[232] = {
   5363   UPB_TABVALUE_EMPTY_INIT,
   5364   UPB_TABVALUE_PTR_INIT(&fields[38]),
   5365   UPB_TABVALUE_PTR_INIT(&fields[16]),
   5366   UPB_TABVALUE_PTR_INIT(&fields[44]),
   5367   UPB_TABVALUE_PTR_INIT(&fields[9]),
   5368   UPB_TABVALUE_PTR_INIT(&fields[15]),
   5369   UPB_TABVALUE_PTR_INIT(&fields[14]),
   5370   UPB_TABVALUE_PTR_INIT(&fields[49]),
   5371   UPB_TABVALUE_EMPTY_INIT,
   5372   UPB_TABVALUE_PTR_INIT(&fields[66]),
   5373   UPB_TABVALUE_PTR_INIT(&fields[8]),
   5374   UPB_TABVALUE_EMPTY_INIT,
   5375   UPB_TABVALUE_PTR_INIT(&fields[40]),
   5376   UPB_TABVALUE_PTR_INIT(&fields[78]),
   5377   UPB_TABVALUE_PTR_INIT(&fields[50]),
   5378   UPB_TABVALUE_EMPTY_INIT,
   5379   UPB_TABVALUE_EMPTY_INIT,
   5380   UPB_TABVALUE_PTR_INIT(&fields[1]),
   5381   UPB_TABVALUE_EMPTY_INIT,
   5382   UPB_TABVALUE_EMPTY_INIT,
   5383   UPB_TABVALUE_EMPTY_INIT,
   5384   UPB_TABVALUE_EMPTY_INIT,
   5385   UPB_TABVALUE_EMPTY_INIT,
   5386   UPB_TABVALUE_EMPTY_INIT,
   5387   UPB_TABVALUE_PTR_INIT(&fields[37]),
   5388   UPB_TABVALUE_PTR_INIT(&fields[47]),
   5389   UPB_TABVALUE_PTR_INIT(&fields[52]),
   5390   UPB_TABVALUE_EMPTY_INIT,
   5391   UPB_TABVALUE_EMPTY_INIT,
   5392   UPB_TABVALUE_EMPTY_INIT,
   5393   UPB_TABVALUE_EMPTY_INIT,
   5394   UPB_TABVALUE_EMPTY_INIT,
   5395   UPB_TABVALUE_PTR_INIT(&fields[41]),
   5396   UPB_TABVALUE_PTR_INIT(&fields[12]),
   5397   UPB_TABVALUE_PTR_INIT(&fields[46]),
   5398   UPB_TABVALUE_PTR_INIT(&fields[27]),
   5399   UPB_TABVALUE_PTR_INIT(&fields[69]),
   5400   UPB_TABVALUE_PTR_INIT(&fields[70]),
   5401   UPB_TABVALUE_PTR_INIT(&fields[4]),
   5402   UPB_TABVALUE_PTR_INIT(&fields[51]),
   5403   UPB_TABVALUE_EMPTY_INIT,
   5404   UPB_TABVALUE_PTR_INIT(&fields[3]),
   5405   UPB_TABVALUE_PTR_INIT(&fields[58]),
   5406   UPB_TABVALUE_PTR_INIT(&fields[6]),
   5407   UPB_TABVALUE_EMPTY_INIT,
   5408   UPB_TABVALUE_PTR_INIT(&fields[28]),
   5409   UPB_TABVALUE_EMPTY_INIT,
   5410   UPB_TABVALUE_EMPTY_INIT,
   5411   UPB_TABVALUE_EMPTY_INIT,
   5412   UPB_TABVALUE_PTR_INIT(&fields[11]),
   5413   UPB_TABVALUE_PTR_INIT(&fields[79]),
   5414   UPB_TABVALUE_EMPTY_INIT,
   5415   UPB_TABVALUE_EMPTY_INIT,
   5416   UPB_TABVALUE_EMPTY_INIT,
   5417   UPB_TABVALUE_EMPTY_INIT,
   5418   UPB_TABVALUE_EMPTY_INIT,
   5419   UPB_TABVALUE_EMPTY_INIT,
   5420   UPB_TABVALUE_EMPTY_INIT,
   5421   UPB_TABVALUE_EMPTY_INIT,
   5422   UPB_TABVALUE_EMPTY_INIT,
   5423   UPB_TABVALUE_EMPTY_INIT,
   5424   UPB_TABVALUE_EMPTY_INIT,
   5425   UPB_TABVALUE_EMPTY_INIT,
   5426   UPB_TABVALUE_EMPTY_INIT,
   5427   UPB_TABVALUE_EMPTY_INIT,
   5428   UPB_TABVALUE_EMPTY_INIT,
   5429   UPB_TABVALUE_EMPTY_INIT,
   5430   UPB_TABVALUE_EMPTY_INIT,
   5431   UPB_TABVALUE_EMPTY_INIT,
   5432   UPB_TABVALUE_EMPTY_INIT,
   5433   UPB_TABVALUE_EMPTY_INIT,
   5434   UPB_TABVALUE_EMPTY_INIT,
   5435   UPB_TABVALUE_EMPTY_INIT,
   5436   UPB_TABVALUE_PTR_INIT(&fields[34]),
   5437   UPB_TABVALUE_PTR_INIT(&fields[57]),
   5438   UPB_TABVALUE_PTR_INIT(&fields[5]),
   5439   UPB_TABVALUE_PTR_INIT(&fields[32]),
   5440   UPB_TABVALUE_PTR_INIT(&fields[10]),
   5441   UPB_TABVALUE_PTR_INIT(&fields[63]),
   5442   UPB_TABVALUE_PTR_INIT(&fields[13]),
   5443   UPB_TABVALUE_PTR_INIT(&fields[53]),
   5444   UPB_TABVALUE_PTR_INIT(&fields[64]),
   5445   UPB_TABVALUE_PTR_INIT(&fields[61]),
   5446   UPB_TABVALUE_PTR_INIT(&fields[80]),
   5447   UPB_TABVALUE_EMPTY_INIT,
   5448   UPB_TABVALUE_PTR_INIT(&fields[17]),
   5449   UPB_TABVALUE_EMPTY_INIT,
   5450   UPB_TABVALUE_PTR_INIT(&fields[26]),
   5451   UPB_TABVALUE_EMPTY_INIT,
   5452   UPB_TABVALUE_EMPTY_INIT,
   5453   UPB_TABVALUE_EMPTY_INIT,
   5454   UPB_TABVALUE_EMPTY_INIT,
   5455   UPB_TABVALUE_EMPTY_INIT,
   5456   UPB_TABVALUE_EMPTY_INIT,
   5457   UPB_TABVALUE_PTR_INIT(&fields[25]),
   5458   UPB_TABVALUE_PTR_INIT(&fields[48]),
   5459   UPB_TABVALUE_PTR_INIT(&fields[24]),
   5460   UPB_TABVALUE_PTR_INIT(&fields[18]),
   5461   UPB_TABVALUE_EMPTY_INIT,
   5462   UPB_TABVALUE_EMPTY_INIT,
   5463   UPB_TABVALUE_EMPTY_INIT,
   5464   UPB_TABVALUE_EMPTY_INIT,
   5465   UPB_TABVALUE_PTR_INIT(&fields[2]),
   5466   UPB_TABVALUE_PTR_INIT(&fields[23]),
   5467   UPB_TABVALUE_PTR_INIT(&fields[62]),
   5468   UPB_TABVALUE_EMPTY_INIT,
   5469   UPB_TABVALUE_PTR_INIT(&fields[22]),
   5470   UPB_TABVALUE_EMPTY_INIT,
   5471   UPB_TABVALUE_EMPTY_INIT,
   5472   UPB_TABVALUE_EMPTY_INIT,
   5473   UPB_TABVALUE_EMPTY_INIT,
   5474   UPB_TABVALUE_EMPTY_INIT,
   5475   UPB_TABVALUE_EMPTY_INIT,
   5476   UPB_TABVALUE_EMPTY_INIT,
   5477   UPB_TABVALUE_EMPTY_INIT,
   5478   UPB_TABVALUE_EMPTY_INIT,
   5479   UPB_TABVALUE_EMPTY_INIT,
   5480   UPB_TABVALUE_EMPTY_INIT,
   5481   UPB_TABVALUE_EMPTY_INIT,
   5482   UPB_TABVALUE_EMPTY_INIT,
   5483   UPB_TABVALUE_EMPTY_INIT,
   5484   UPB_TABVALUE_EMPTY_INIT,
   5485   UPB_TABVALUE_EMPTY_INIT,
   5486   UPB_TABVALUE_EMPTY_INIT,
   5487   UPB_TABVALUE_EMPTY_INIT,
   5488   UPB_TABVALUE_EMPTY_INIT,
   5489   UPB_TABVALUE_EMPTY_INIT,
   5490   UPB_TABVALUE_EMPTY_INIT,
   5491   UPB_TABVALUE_EMPTY_INIT,
   5492   UPB_TABVALUE_EMPTY_INIT,
   5493   UPB_TABVALUE_EMPTY_INIT,
   5494   UPB_TABVALUE_EMPTY_INIT,
   5495   UPB_TABVALUE_EMPTY_INIT,
   5496   UPB_TABVALUE_EMPTY_INIT,
   5497   UPB_TABVALUE_EMPTY_INIT,
   5498   UPB_TABVALUE_EMPTY_INIT,
   5499   UPB_TABVALUE_EMPTY_INIT,
   5500   UPB_TABVALUE_EMPTY_INIT,
   5501   UPB_TABVALUE_EMPTY_INIT,
   5502   UPB_TABVALUE_EMPTY_INIT,
   5503   UPB_TABVALUE_EMPTY_INIT,
   5504   UPB_TABVALUE_EMPTY_INIT,
   5505   UPB_TABVALUE_EMPTY_INIT,
   5506   UPB_TABVALUE_EMPTY_INIT,
   5507   UPB_TABVALUE_EMPTY_INIT,
   5508   UPB_TABVALUE_EMPTY_INIT,
   5509   UPB_TABVALUE_EMPTY_INIT,
   5510   UPB_TABVALUE_EMPTY_INIT,
   5511   UPB_TABVALUE_EMPTY_INIT,
   5512   UPB_TABVALUE_EMPTY_INIT,
   5513   UPB_TABVALUE_EMPTY_INIT,
   5514   UPB_TABVALUE_PTR_INIT(&fields[31]),
   5515   UPB_TABVALUE_PTR_INIT(&fields[45]),
   5516   UPB_TABVALUE_EMPTY_INIT,
   5517   UPB_TABVALUE_EMPTY_INIT,
   5518   UPB_TABVALUE_EMPTY_INIT,
   5519   UPB_TABVALUE_EMPTY_INIT,
   5520   UPB_TABVALUE_EMPTY_INIT,
   5521   UPB_TABVALUE_EMPTY_INIT,
   5522   UPB_TABVALUE_EMPTY_INIT,
   5523   UPB_TABVALUE_EMPTY_INIT,
   5524   UPB_TABVALUE_EMPTY_INIT,
   5525   UPB_TABVALUE_EMPTY_INIT,
   5526   UPB_TABVALUE_EMPTY_INIT,
   5527   UPB_TABVALUE_EMPTY_INIT,
   5528   UPB_TABVALUE_EMPTY_INIT,
   5529   UPB_TABVALUE_EMPTY_INIT,
   5530   UPB_TABVALUE_PTR_INIT(&fields[39]),
   5531   UPB_TABVALUE_PTR_INIT(&fields[20]),
   5532   UPB_TABVALUE_PTR_INIT(&fields[56]),
   5533   UPB_TABVALUE_PTR_INIT(&fields[55]),
   5534   UPB_TABVALUE_EMPTY_INIT,
   5535   UPB_TABVALUE_EMPTY_INIT,
   5536   UPB_TABVALUE_EMPTY_INIT,
   5537   UPB_TABVALUE_EMPTY_INIT,
   5538   UPB_TABVALUE_EMPTY_INIT,
   5539   UPB_TABVALUE_PTR_INIT(&fields[35]),
   5540   UPB_TABVALUE_PTR_INIT(&fields[33]),
   5541   UPB_TABVALUE_PTR_INIT(&fields[54]),
   5542   UPB_TABVALUE_EMPTY_INIT,
   5543   UPB_TABVALUE_EMPTY_INIT,
   5544   UPB_TABVALUE_EMPTY_INIT,
   5545   UPB_TABVALUE_EMPTY_INIT,
   5546   UPB_TABVALUE_EMPTY_INIT,
   5547   UPB_TABVALUE_PTR_INIT(&fields[30]),
   5548   UPB_TABVALUE_EMPTY_INIT,
   5549   UPB_TABVALUE_PTR_INIT(&fields[59]),
   5550   UPB_TABVALUE_PTR_INIT(&fields[65]),
   5551   UPB_TABVALUE_PTR_INIT(&fields[29]),
   5552   UPB_TABVALUE_PTR_INIT(&fields[68]),
   5553   UPB_TABVALUE_EMPTY_INIT,
   5554   UPB_TABVALUE_EMPTY_INIT,
   5555   UPB_TABVALUE_PTR_INIT(&fields[36]),
   5556   UPB_TABVALUE_PTR_INIT(&fields[19]),
   5557   UPB_TABVALUE_PTR_INIT(&fields[60]),
   5558   UPB_TABVALUE_PTR_INIT(&fields[43]),
   5559   UPB_TABVALUE_PTR_INIT(&fields[7]),
   5560   UPB_TABVALUE_PTR_INIT(&fields[67]),
   5561   UPB_TABVALUE_PTR_INIT(&fields[0]),
   5562   UPB_TABVALUE_EMPTY_INIT,
   5563   UPB_TABVALUE_PTR_INIT(&fields[42]),
   5564   UPB_TABVALUE_PTR_INIT(&fields[21]),
   5565   UPB_TABVALUE_EMPTY_INIT,
   5566   UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"),
   5567   UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"),
   5568   UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"),
   5569   UPB_TABVALUE_EMPTY_INIT,
   5570   UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"),
   5571   UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"),
   5572   UPB_TABVALUE_PTR_INIT("TYPE_INT64"),
   5573   UPB_TABVALUE_PTR_INIT("TYPE_UINT64"),
   5574   UPB_TABVALUE_PTR_INIT("TYPE_INT32"),
   5575   UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"),
   5576   UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"),
   5577   UPB_TABVALUE_PTR_INIT("TYPE_BOOL"),
   5578   UPB_TABVALUE_PTR_INIT("TYPE_STRING"),
   5579   UPB_TABVALUE_PTR_INIT("TYPE_GROUP"),
   5580   UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"),
   5581   UPB_TABVALUE_PTR_INIT("TYPE_BYTES"),
   5582   UPB_TABVALUE_PTR_INIT("TYPE_UINT32"),
   5583   UPB_TABVALUE_PTR_INIT("TYPE_ENUM"),
   5584   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"),
   5585   UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"),
   5586   UPB_TABVALUE_PTR_INIT("TYPE_SINT32"),
   5587   UPB_TABVALUE_PTR_INIT("TYPE_SINT64"),
   5588   UPB_TABVALUE_PTR_INIT("STRING"),
   5589   UPB_TABVALUE_PTR_INIT("CORD"),
   5590   UPB_TABVALUE_PTR_INIT("STRING_PIECE"),
   5591   UPB_TABVALUE_EMPTY_INIT,
   5592   UPB_TABVALUE_PTR_INIT("SPEED"),
   5593   UPB_TABVALUE_PTR_INIT("CODE_SIZE"),
   5594   UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"),
   5595 };
   5596 
   5597 static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_CTYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]);
   5598 
   5599 const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) {
   5600   upb_symtab_ref(&symtab, owner);
   5601   return &symtab;
   5602 }
   5603 
   5604 #ifdef UPB_DEBUG_REFS
   5605 static upb_inttable reftables[212] = {
   5606   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5607   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5608   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5609   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5610   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5611   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5612   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5613   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5614   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5615   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5616   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5617   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5618   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5619   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5620   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5621   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5622   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5623   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5624   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5625   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5626   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5627   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5628   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5629   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5630   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5631   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5632   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5633   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5634   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5635   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5636   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5637   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5638   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5639   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5640   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5641   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5642   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5643   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5644   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5645   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5646   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5647   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5648   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5649   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5650   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5651   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5652   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5653   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5654   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5655   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5656   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5657   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5658   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5659   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5660   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5661   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5662   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5663   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5664   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5665   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5666   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5667   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5668   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5669   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5670   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5671   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5672   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5673   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5674   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5675   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5676   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5677   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5678   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5679   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5680   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5681   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5682   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5683   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5684   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5685   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5686   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5687   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5688   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5689   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5690   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5691   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5692   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5693   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5694   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5695   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5696   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5697   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5698   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5699   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5700   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5701   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5702   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5703   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5704   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5705   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5706   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5707   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5708   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5709   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5710   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5711   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5712   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5713   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5714   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5715   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5716   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5717   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5718   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5719   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5720   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5721   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5722   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5723   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5724   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5725   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5726   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5727   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5728   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5729   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5730   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5731   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5732   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5733   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5734   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5735   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5736   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5737   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5738   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5739   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5740   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5741   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5742   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5743   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5744   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5745   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5746   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5747   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5748   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5749   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5750   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5751   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5752   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5753   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5754   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5755   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5756   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5757   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5758   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5759   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5760   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5761   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5762   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5763   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5764   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5765   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5766   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5767   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5768   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5769   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5770   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5771   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5772   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5773   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5774   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5775   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5776   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5777   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5778   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5779   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5780   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5781   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5782   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5783   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5784   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5785   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5786   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5787   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5788   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5789   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5790   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5791   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5792   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5793   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5794   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5795   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5796   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5797   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5798   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5799   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5800   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5801   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5802   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5803   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5804   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5805   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5806   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5807   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5808   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5809   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5810   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5811   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5812   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5813   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5814   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5815   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5816   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5817   UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR),
   5818 };
   5819 #endif
   5820 
   5821 /*
   5822 ** XXX: The routines in this file that consume a string do not currently
   5823 ** support having the string span buffers.  In the future, as upb_sink and
   5824 ** its buffering/sharing functionality evolve there should be an easy and
   5825 ** idiomatic way of correctly handling this case.  For now, we accept this
   5826 ** limitation since we currently only parse descriptors from single strings.
   5827 */
   5828 
   5829 
   5830 #include <errno.h>
   5831 #include <stdlib.h>
   5832 #include <string.h>
   5833 
   5834 /* upb_deflist is an internal-only dynamic array for storing a growing list of
   5835  * upb_defs. */
   5836 typedef struct {
   5837   upb_def **defs;
   5838   size_t len;
   5839   size_t size;
   5840   bool owned;
   5841 } upb_deflist;
   5842 
   5843 /* We keep a stack of all the messages scopes we are currently in, as well as
   5844  * the top-level file scope.  This is necessary to correctly qualify the
   5845  * definitions that are contained inside.  "name" tracks the name of the
   5846  * message or package (a bare name -- not qualified by any enclosing scopes). */
   5847 typedef struct {
   5848   char *name;
   5849   /* Index of the first def that is under this scope.  For msgdefs, the
   5850    * msgdef itself is at start-1. */
   5851   int start;
   5852 } upb_descreader_frame;
   5853 
   5854 /* The maximum number of nested declarations that are allowed, ie.
   5855  * message Foo {
   5856  *   message Bar {
   5857  *     message Baz {
   5858  *     }
   5859  *   }
   5860  * }
   5861  *
   5862  * This is a resource limit that affects how big our runtime stack can grow.
   5863  * TODO: make this a runtime-settable property of the Reader instance. */
   5864 #define UPB_MAX_MESSAGE_NESTING 64
   5865 
   5866 struct upb_descreader {
   5867   upb_sink sink;
   5868   upb_deflist defs;
   5869   upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING];
   5870   int stack_len;
   5871 
   5872   uint32_t number;
   5873   char *name;
   5874   bool saw_number;
   5875   bool saw_name;
   5876 
   5877   char *default_string;
   5878 
   5879   upb_fielddef *f;
   5880 };
   5881 
   5882 static char *upb_strndup(const char *buf, size_t n) {
   5883   char *ret = malloc(n + 1);
   5884   if (!ret) return NULL;
   5885   memcpy(ret, buf, n);
   5886   ret[n] = '\0';
   5887   return ret;
   5888 }
   5889 
   5890 /* Returns a newly allocated string that joins input strings together, for
   5891  * example:
   5892  *   join("Foo.Bar", "Baz") -> "Foo.Bar.Baz"
   5893  *   join("", "Baz") -> "Baz"
   5894  * Caller owns a ref on the returned string. */
   5895 static char *upb_join(const char *base, const char *name) {
   5896   if (!base || strlen(base) == 0) {
   5897     return upb_strdup(name);
   5898   } else {
   5899     char *ret = malloc(strlen(base) + strlen(name) + 2);
   5900     ret[0] = '\0';
   5901     strcat(ret, base);
   5902     strcat(ret, ".");
   5903     strcat(ret, name);
   5904     return ret;
   5905   }
   5906 }
   5907 
   5908 
   5909 /* upb_deflist ****************************************************************/
   5910 
   5911 void upb_deflist_init(upb_deflist *l) {
   5912   l->size = 0;
   5913   l->defs = NULL;
   5914   l->len = 0;
   5915   l->owned = true;
   5916 }
   5917 
   5918 void upb_deflist_uninit(upb_deflist *l) {
   5919   size_t i;
   5920   if (l->owned)
   5921     for(i = 0; i < l->len; i++)
   5922       upb_def_unref(l->defs[i], l);
   5923   free(l->defs);
   5924 }
   5925 
   5926 bool upb_deflist_push(upb_deflist *l, upb_def *d) {
   5927   if(++l->len >= l->size) {
   5928     size_t new_size = UPB_MAX(l->size, 4);
   5929     new_size *= 2;
   5930     l->defs = realloc(l->defs, new_size * sizeof(void *));
   5931     if (!l->defs) return false;
   5932     l->size = new_size;
   5933   }
   5934   l->defs[l->len - 1] = d;
   5935   return true;
   5936 }
   5937 
   5938 void upb_deflist_donaterefs(upb_deflist *l, void *owner) {
   5939   size_t i;
   5940   assert(l->owned);
   5941   for (i = 0; i < l->len; i++)
   5942     upb_def_donateref(l->defs[i], l, owner);
   5943   l->owned = false;
   5944 }
   5945 
   5946 static upb_def *upb_deflist_last(upb_deflist *l) {
   5947   return l->defs[l->len-1];
   5948 }
   5949 
   5950 /* Qualify the defname for all defs starting with offset "start" with "str". */
   5951 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) {
   5952   uint32_t i;
   5953   for (i = start; i < l->len; i++) {
   5954     upb_def *def = l->defs[i];
   5955     char *name = upb_join(str, upb_def_fullname(def));
   5956     upb_def_setfullname(def, name, NULL);
   5957     free(name);
   5958   }
   5959 }
   5960 
   5961 
   5962 /* upb_descreader  ************************************************************/
   5963 
   5964 static upb_msgdef *upb_descreader_top(upb_descreader *r) {
   5965   int index;
   5966   assert(r->stack_len > 1);
   5967   index = r->stack[r->stack_len-1].start - 1;
   5968   assert(index >= 0);
   5969   return upb_downcast_msgdef_mutable(r->defs.defs[index]);
   5970 }
   5971 
   5972 static upb_def *upb_descreader_last(upb_descreader *r) {
   5973   return upb_deflist_last(&r->defs);
   5974 }
   5975 
   5976 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two
   5977  * entities that have names and can contain sub-definitions. */
   5978 void upb_descreader_startcontainer(upb_descreader *r) {
   5979   upb_descreader_frame *f = &r->stack[r->stack_len++];
   5980   f->start = r->defs.len;
   5981   f->name = NULL;
   5982 }
   5983 
   5984 void upb_descreader_endcontainer(upb_descreader *r) {
   5985   upb_descreader_frame *f = &r->stack[--r->stack_len];
   5986   upb_deflist_qualify(&r->defs, f->name, f->start);
   5987   free(f->name);
   5988   f->name = NULL;
   5989 }
   5990 
   5991 void upb_descreader_setscopename(upb_descreader *r, char *str) {
   5992   upb_descreader_frame *f = &r->stack[r->stack_len-1];
   5993   free(f->name);
   5994   f->name = str;
   5995 }
   5996 
   5997 /* Handlers for google.protobuf.FileDescriptorProto. */
   5998 static bool file_startmsg(void *r, const void *hd) {
   5999   UPB_UNUSED(hd);
   6000   upb_descreader_startcontainer(r);
   6001   return true;
   6002 }
   6003 
   6004 static bool file_endmsg(void *closure, const void *hd, upb_status *status) {
   6005   upb_descreader *r = closure;
   6006   UPB_UNUSED(hd);
   6007   UPB_UNUSED(status);
   6008   upb_descreader_endcontainer(r);
   6009   return true;
   6010 }
   6011 
   6012 static size_t file_onpackage(void *closure, const void *hd, const char *buf,
   6013                              size_t n, const upb_bufhandle *handle) {
   6014   upb_descreader *r = closure;
   6015   UPB_UNUSED(hd);
   6016   UPB_UNUSED(handle);
   6017   /* XXX: see comment at the top of the file. */
   6018   upb_descreader_setscopename(r, upb_strndup(buf, n));
   6019   return n;
   6020 }
   6021 
   6022 /* Handlers for google.protobuf.EnumValueDescriptorProto. */
   6023 static bool enumval_startmsg(void *closure, const void *hd) {
   6024   upb_descreader *r = closure;
   6025   UPB_UNUSED(hd);
   6026   r->saw_number = false;
   6027   r->saw_name = false;
   6028   return true;
   6029 }
   6030 
   6031 static size_t enumval_onname(void *closure, const void *hd, const char *buf,
   6032                              size_t n, const upb_bufhandle *handle) {
   6033   upb_descreader *r = closure;
   6034   UPB_UNUSED(hd);
   6035   UPB_UNUSED(handle);
   6036   /* XXX: see comment at the top of the file. */
   6037   free(r->name);
   6038   r->name = upb_strndup(buf, n);
   6039   r->saw_name = true;
   6040   return n;
   6041 }
   6042 
   6043 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) {
   6044   upb_descreader *r = closure;
   6045   UPB_UNUSED(hd);
   6046   r->number = val;
   6047   r->saw_number = true;
   6048   return true;
   6049 }
   6050 
   6051 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) {
   6052   upb_descreader *r = closure;
   6053   upb_enumdef *e;
   6054   UPB_UNUSED(hd);
   6055 
   6056   if(!r->saw_number || !r->saw_name) {
   6057     upb_status_seterrmsg(status, "Enum value missing name or number.");
   6058     return false;
   6059   }
   6060   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
   6061   upb_enumdef_addval(e, r->name, r->number, status);
   6062   free(r->name);
   6063   r->name = NULL;
   6064   return true;
   6065 }
   6066 
   6067 
   6068 /* Handlers for google.protobuf.EnumDescriptorProto. */
   6069 static bool enum_startmsg(void *closure, const void *hd) {
   6070   upb_descreader *r = closure;
   6071   UPB_UNUSED(hd);
   6072   upb_deflist_push(&r->defs,
   6073                    upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs)));
   6074   return true;
   6075 }
   6076 
   6077 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) {
   6078   upb_descreader *r = closure;
   6079   upb_enumdef *e;
   6080   UPB_UNUSED(hd);
   6081 
   6082   e = upb_downcast_enumdef_mutable(upb_descreader_last(r));
   6083   if (upb_def_fullname(upb_descreader_last(r)) == NULL) {
   6084     upb_status_seterrmsg(status, "Enum had no name.");
   6085     return false;
   6086   }
   6087   if (upb_enumdef_numvals(e) == 0) {
   6088     upb_status_seterrmsg(status, "Enum had no values.");
   6089     return false;
   6090   }
   6091   return true;
   6092 }
   6093 
   6094 static size_t enum_onname(void *closure, const void *hd, const char *buf,
   6095                           size_t n, const upb_bufhandle *handle) {
   6096   upb_descreader *r = closure;
   6097   char *fullname = upb_strndup(buf, n);
   6098   UPB_UNUSED(hd);
   6099   UPB_UNUSED(handle);
   6100   /* XXX: see comment at the top of the file. */
   6101   upb_def_setfullname(upb_descreader_last(r), fullname, NULL);
   6102   free(fullname);
   6103   return n;
   6104 }
   6105 
   6106 /* Handlers for google.protobuf.FieldDescriptorProto */
   6107 static bool field_startmsg(void *closure, const void *hd) {
   6108   upb_descreader *r = closure;
   6109   UPB_UNUSED(hd);
   6110   r->f = upb_fielddef_new(&r->defs);
   6111   free(r->default_string);
   6112   r->default_string = NULL;
   6113 
   6114   /* fielddefs default to packed, but descriptors default to non-packed. */
   6115   upb_fielddef_setpacked(r->f, false);
   6116   return true;
   6117 }
   6118 
   6119 /* Converts the default value in string "str" into "d".  Passes a ref on str.
   6120  * Returns true on success. */
   6121 static bool parse_default(char *str, upb_fielddef *f) {
   6122   bool success = true;
   6123   char *end;
   6124   switch (upb_fielddef_type(f)) {
   6125     case UPB_TYPE_INT32: {
   6126       long val = strtol(str, &end, 0);
   6127       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end)
   6128         success = false;
   6129       else
   6130         upb_fielddef_setdefaultint32(f, val);
   6131       break;
   6132     }
   6133     case UPB_TYPE_INT64: {
   6134       /* XXX: Need to write our own strtoll, since it's not available in c89. */
   6135       long long val = strtol(str, &end, 0);
   6136       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end)
   6137         success = false;
   6138       else
   6139         upb_fielddef_setdefaultint64(f, val);
   6140       break;
   6141     }
   6142     case UPB_TYPE_UINT32: {
   6143       unsigned long val = strtoul(str, &end, 0);
   6144       if (val > UINT32_MAX || errno == ERANGE || *end)
   6145         success = false;
   6146       else
   6147         upb_fielddef_setdefaultuint32(f, val);
   6148       break;
   6149     }
   6150     case UPB_TYPE_UINT64: {
   6151       /* XXX: Need to write our own strtoull, since it's not available in c89. */
   6152       unsigned long long val = strtoul(str, &end, 0);
   6153       if (val > UINT64_MAX || errno == ERANGE || *end)
   6154         success = false;
   6155       else
   6156         upb_fielddef_setdefaultuint64(f, val);
   6157       break;
   6158     }
   6159     case UPB_TYPE_DOUBLE: {
   6160       double val = strtod(str, &end);
   6161       if (errno == ERANGE || *end)
   6162         success = false;
   6163       else
   6164         upb_fielddef_setdefaultdouble(f, val);
   6165       break;
   6166     }
   6167     case UPB_TYPE_FLOAT: {
   6168       /* XXX: Need to write our own strtof, since it's not available in c89. */
   6169       float val = strtod(str, &end);
   6170       if (errno == ERANGE || *end)
   6171         success = false;
   6172       else
   6173         upb_fielddef_setdefaultfloat(f, val);
   6174       break;
   6175     }
   6176     case UPB_TYPE_BOOL: {
   6177       if (strcmp(str, "false") == 0)
   6178         upb_fielddef_setdefaultbool(f, false);
   6179       else if (strcmp(str, "true") == 0)
   6180         upb_fielddef_setdefaultbool(f, true);
   6181       else
   6182         success = false;
   6183       break;
   6184     }
   6185     default: abort();
   6186   }
   6187   return success;
   6188 }
   6189 
   6190 static bool field_endmsg(void *closure, const void *hd, upb_status *status) {
   6191   upb_descreader *r = closure;
   6192   upb_fielddef *f = r->f;
   6193   UPB_UNUSED(hd);
   6194 
   6195   /* TODO: verify that all required fields were present. */
   6196   assert(upb_fielddef_number(f) != 0);
   6197   assert(upb_fielddef_name(f) != NULL);
   6198   assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f));
   6199 
   6200   if (r->default_string) {
   6201     if (upb_fielddef_issubmsg(f)) {
   6202       upb_status_seterrmsg(status, "Submessages cannot have defaults.");
   6203       return false;
   6204     }
   6205     if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) {
   6206       upb_fielddef_setdefaultcstr(f, r->default_string, NULL);
   6207     } else {
   6208       if (r->default_string && !parse_default(r->default_string, f)) {
   6209         /* We don't worry too much about giving a great error message since the
   6210          * compiler should have ensured this was correct. */
   6211         upb_status_seterrmsg(status, "Error converting default value.");
   6212         return false;
   6213       }
   6214     }
   6215   }
   6216   return true;
   6217 }
   6218 
   6219 static bool field_onlazy(void *closure, const void *hd, bool val) {
   6220   upb_descreader *r = closure;
   6221   UPB_UNUSED(hd);
   6222 
   6223   upb_fielddef_setlazy(r->f, val);
   6224   return true;
   6225 }
   6226 
   6227 static bool field_onpacked(void *closure, const void *hd, bool val) {
   6228   upb_descreader *r = closure;
   6229   UPB_UNUSED(hd);
   6230 
   6231   upb_fielddef_setpacked(r->f, val);
   6232   return true;
   6233 }
   6234 
   6235 static bool field_ontype(void *closure, const void *hd, int32_t val) {
   6236   upb_descreader *r = closure;
   6237   UPB_UNUSED(hd);
   6238 
   6239   upb_fielddef_setdescriptortype(r->f, val);
   6240   return true;
   6241 }
   6242 
   6243 static bool field_onlabel(void *closure, const void *hd, int32_t val) {
   6244   upb_descreader *r = closure;
   6245   UPB_UNUSED(hd);
   6246 
   6247   upb_fielddef_setlabel(r->f, val);
   6248   return true;
   6249 }
   6250 
   6251 static bool field_onnumber(void *closure, const void *hd, int32_t val) {
   6252   upb_descreader *r = closure;
   6253   bool ok = upb_fielddef_setnumber(r->f, val, NULL);
   6254   UPB_UNUSED(hd);
   6255 
   6256   UPB_ASSERT_VAR(ok, ok);
   6257   return true;
   6258 }
   6259 
   6260 static size_t field_onname(void *closure, const void *hd, const char *buf,
   6261                            size_t n, const upb_bufhandle *handle) {
   6262   upb_descreader *r = closure;
   6263   char *name = upb_strndup(buf, n);
   6264   UPB_UNUSED(hd);
   6265   UPB_UNUSED(handle);
   6266 
   6267   /* XXX: see comment at the top of the file. */
   6268   upb_fielddef_setname(r->f, name, NULL);
   6269   free(name);
   6270   return n;
   6271 }
   6272 
   6273 static size_t field_ontypename(void *closure, const void *hd, const char *buf,
   6274                                size_t n, const upb_bufhandle *handle) {
   6275   upb_descreader *r = closure;
   6276   char *name = upb_strndup(buf, n);
   6277   UPB_UNUSED(hd);
   6278   UPB_UNUSED(handle);
   6279 
   6280   /* XXX: see comment at the top of the file. */
   6281   upb_fielddef_setsubdefname(r->f, name, NULL);
   6282   free(name);
   6283   return n;
   6284 }
   6285 
   6286 static size_t field_onextendee(void *closure, const void *hd, const char *buf,
   6287                                size_t n, const upb_bufhandle *handle) {
   6288   upb_descreader *r = closure;
   6289   char *name = upb_strndup(buf, n);
   6290   UPB_UNUSED(hd);
   6291   UPB_UNUSED(handle);
   6292 
   6293   /* XXX: see comment at the top of the file. */
   6294   upb_fielddef_setcontainingtypename(r->f, name, NULL);
   6295   free(name);
   6296   return n;
   6297 }
   6298 
   6299 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf,
   6300                                  size_t n, const upb_bufhandle *handle) {
   6301   upb_descreader *r = closure;
   6302   UPB_UNUSED(hd);
   6303   UPB_UNUSED(handle);
   6304 
   6305   /* Have to convert from string to the correct type, but we might not know the
   6306    * type yet, so we save it as a string until the end of the field.
   6307    * XXX: see comment at the top of the file. */
   6308   free(r->default_string);
   6309   r->default_string = upb_strndup(buf, n);
   6310   return n;
   6311 }
   6312 
   6313 /* Handlers for google.protobuf.DescriptorProto (representing a message). */
   6314 static bool msg_startmsg(void *closure, const void *hd) {
   6315   upb_descreader *r = closure;
   6316   UPB_UNUSED(hd);
   6317 
   6318   upb_deflist_push(&r->defs,
   6319                    upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs)));
   6320   upb_descreader_startcontainer(r);
   6321   return true;
   6322 }
   6323 
   6324 static bool msg_endmsg(void *closure, const void *hd, upb_status *status) {
   6325   upb_descreader *r = closure;
   6326   upb_msgdef *m = upb_descreader_top(r);
   6327   UPB_UNUSED(hd);
   6328 
   6329   if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) {
   6330     upb_status_seterrmsg(status, "Encountered message with no name.");
   6331     return false;
   6332   }
   6333   upb_descreader_endcontainer(r);
   6334   return true;
   6335 }
   6336 
   6337 static size_t msg_onname(void *closure, const void *hd, const char *buf,
   6338                          size_t n, const upb_bufhandle *handle) {
   6339   upb_descreader *r = closure;
   6340   upb_msgdef *m = upb_descreader_top(r);
   6341   /* XXX: see comment at the top of the file. */
   6342   char *name = upb_strndup(buf, n);
   6343   UPB_UNUSED(hd);
   6344   UPB_UNUSED(handle);
   6345 
   6346   upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL);
   6347   upb_descreader_setscopename(r, name);  /* Passes ownership of name. */
   6348   return n;
   6349 }
   6350 
   6351 static bool msg_onendfield(void *closure, const void *hd) {
   6352   upb_descreader *r = closure;
   6353   upb_msgdef *m = upb_descreader_top(r);
   6354   UPB_UNUSED(hd);
   6355 
   6356   upb_msgdef_addfield(m, r->f, &r->defs, NULL);
   6357   r->f = NULL;
   6358   return true;
   6359 }
   6360 
   6361 static bool pushextension(void *closure, const void *hd) {
   6362   upb_descreader *r = closure;
   6363   UPB_UNUSED(hd);
   6364 
   6365   assert(upb_fielddef_containingtypename(r->f));
   6366   upb_fielddef_setisextension(r->f, true);
   6367   upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f));
   6368   r->f = NULL;
   6369   return true;
   6370 }
   6371 
   6372 #define D(name) upbdefs_google_protobuf_ ## name(s)
   6373 
   6374 static void reghandlers(const void *closure, upb_handlers *h) {
   6375   const upb_symtab *s = closure;
   6376   const upb_msgdef *m = upb_handlers_msgdef(h);
   6377 
   6378   if (m == D(DescriptorProto)) {
   6379     upb_handlers_setstartmsg(h, &msg_startmsg, NULL);
   6380     upb_handlers_setendmsg(h, &msg_endmsg, NULL);
   6381     upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL);
   6382     upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield,
   6383                               NULL);
   6384     upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension,
   6385                               NULL);
   6386   } else if (m == D(FileDescriptorProto)) {
   6387     upb_handlers_setstartmsg(h, &file_startmsg, NULL);
   6388     upb_handlers_setendmsg(h, &file_endmsg, NULL);
   6389     upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage,
   6390                            NULL);
   6391     upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension,
   6392                               NULL);
   6393   } else if (m == D(EnumValueDescriptorProto)) {
   6394     upb_handlers_setstartmsg(h, &enumval_startmsg, NULL);
   6395     upb_handlers_setendmsg(h, &enumval_endmsg, NULL);
   6396     upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL);
   6397     upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber,
   6398                           NULL);
   6399   } else if (m == D(EnumDescriptorProto)) {
   6400     upb_handlers_setstartmsg(h, &enum_startmsg, NULL);
   6401     upb_handlers_setendmsg(h, &enum_endmsg, NULL);
   6402     upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL);
   6403   } else if (m == D(FieldDescriptorProto)) {
   6404     upb_handlers_setstartmsg(h, &field_startmsg, NULL);
   6405     upb_handlers_setendmsg(h, &field_endmsg, NULL);
   6406     upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype,
   6407                           NULL);
   6408     upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel,
   6409                           NULL);
   6410     upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber,
   6411                           NULL);
   6412     upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname,
   6413                            NULL);
   6414     upb_handlers_setstring(h, D(FieldDescriptorProto_type_name),
   6415                            &field_ontypename, NULL);
   6416     upb_handlers_setstring(h, D(FieldDescriptorProto_extendee),
   6417                            &field_onextendee, NULL);
   6418     upb_handlers_setstring(h, D(FieldDescriptorProto_default_value),
   6419                            &field_ondefaultval, NULL);
   6420   } else if (m == D(FieldOptions)) {
   6421     upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL);
   6422     upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL);
   6423   }
   6424 }
   6425 
   6426 #undef D
   6427 
   6428 void descreader_cleanup(void *_r) {
   6429   upb_descreader *r = _r;
   6430   free(r->name);
   6431   upb_deflist_uninit(&r->defs);
   6432   free(r->default_string);
   6433   while (r->stack_len > 0) {
   6434     upb_descreader_frame *f = &r->stack[--r->stack_len];
   6435     free(f->name);
   6436   }
   6437 }
   6438 
   6439 
   6440 /* Public API  ****************************************************************/
   6441 
   6442 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) {
   6443   upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader));
   6444   if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) {
   6445     return NULL;
   6446   }
   6447 
   6448   upb_deflist_init(&r->defs);
   6449   upb_sink_reset(upb_descreader_input(r), h, r);
   6450   r->stack_len = 0;
   6451   r->name = NULL;
   6452   r->default_string = NULL;
   6453 
   6454   return r;
   6455 }
   6456 
   6457 upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) {
   6458   *n = r->defs.len;
   6459   upb_deflist_donaterefs(&r->defs, owner);
   6460   return r->defs.defs;
   6461 }
   6462 
   6463 upb_sink *upb_descreader_input(upb_descreader *r) {
   6464   return &r->sink;
   6465 }
   6466 
   6467 const upb_handlers *upb_descreader_newhandlers(const void *owner) {
   6468   const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s);
   6469   const upb_handlers *h = upb_handlers_newfrozen(
   6470       upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s);
   6471   upb_symtab_unref(s, &s);
   6472   return h;
   6473 }
   6474 /*
   6475 ** protobuf decoder bytecode compiler
   6476 **
   6477 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf
   6478 ** according to that specific schema and destination handlers.
   6479 **
   6480 ** Compiling to bytecode is always the first step.  If we are using the
   6481 ** interpreted decoder we leave it as bytecode and interpret that.  If we are
   6482 ** using a JIT decoder we use a code generator to turn the bytecode into native
   6483 ** code, LLVM IR, etc.
   6484 **
   6485 ** Bytecode definition is in decoder.int.h.
   6486 */
   6487 
   6488 #include <stdarg.h>
   6489 
   6490 #ifdef UPB_DUMP_BYTECODE
   6491 #include <stdio.h>
   6492 #endif
   6493 
   6494 #define MAXLABEL 5
   6495 #define EMPTYLABEL -1
   6496 
   6497 /* mgroup *********************************************************************/
   6498 
   6499 static void freegroup(upb_refcounted *r) {
   6500   mgroup *g = (mgroup*)r;
   6501   upb_inttable_uninit(&g->methods);
   6502 #ifdef UPB_USE_JIT_X64
   6503   upb_pbdecoder_freejit(g);
   6504 #endif
   6505   free(g->bytecode);
   6506   free(g);
   6507 }
   6508 
   6509 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit,
   6510                        void *closure) {
   6511   const mgroup *g = (const mgroup*)r;
   6512   upb_inttable_iter i;
   6513   upb_inttable_begin(&i, &g->methods);
   6514   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   6515     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
   6516     visit(r, upb_pbdecodermethod_upcast(method), closure);
   6517   }
   6518 }
   6519 
   6520 mgroup *newgroup(const void *owner) {
   6521   mgroup *g = malloc(sizeof(*g));
   6522   static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup};
   6523   upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner);
   6524   upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
   6525   g->bytecode = NULL;
   6526   g->bytecode_end = NULL;
   6527   return g;
   6528 }
   6529 
   6530 
   6531 /* upb_pbdecodermethod ********************************************************/
   6532 
   6533 static void freemethod(upb_refcounted *r) {
   6534   upb_pbdecodermethod *method = (upb_pbdecodermethod*)r;
   6535 
   6536   if (method->dest_handlers_) {
   6537     upb_handlers_unref(method->dest_handlers_, method);
   6538   }
   6539 
   6540   upb_inttable_uninit(&method->dispatch);
   6541   free(method);
   6542 }
   6543 
   6544 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit,
   6545                         void *closure) {
   6546   const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r;
   6547   visit(r, m->group, closure);
   6548 }
   6549 
   6550 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
   6551                                       mgroup *group) {
   6552   static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod};
   6553   upb_pbdecodermethod *ret = malloc(sizeof(*ret));
   6554   upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret);
   6555   upb_byteshandler_init(&ret->input_handler_);
   6556 
   6557   /* The method references the group and vice-versa, in a circular reference. */
   6558   upb_ref2(ret, group);
   6559   upb_ref2(group, ret);
   6560   upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret));
   6561   upb_pbdecodermethod_unref(ret, &ret);
   6562 
   6563   ret->group = mgroup_upcast_mutable(group);
   6564   ret->dest_handlers_ = dest_handlers;
   6565   ret->is_native_ = false;  /* If we JIT, it will update this later. */
   6566   upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
   6567 
   6568   if (ret->dest_handlers_) {
   6569     upb_handlers_ref(ret->dest_handlers_, ret);
   6570   }
   6571   return ret;
   6572 }
   6573 
   6574 const upb_handlers *upb_pbdecodermethod_desthandlers(
   6575     const upb_pbdecodermethod *m) {
   6576   return m->dest_handlers_;
   6577 }
   6578 
   6579 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
   6580     const upb_pbdecodermethod *m) {
   6581   return &m->input_handler_;
   6582 }
   6583 
   6584 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
   6585   return m->is_native_;
   6586 }
   6587 
   6588 const upb_pbdecodermethod *upb_pbdecodermethod_new(
   6589     const upb_pbdecodermethodopts *opts, const void *owner) {
   6590   const upb_pbdecodermethod *ret;
   6591   upb_pbcodecache cache;
   6592 
   6593   upb_pbcodecache_init(&cache);
   6594   ret = upb_pbcodecache_getdecodermethod(&cache, opts);
   6595   upb_pbdecodermethod_ref(ret, owner);
   6596   upb_pbcodecache_uninit(&cache);
   6597   return ret;
   6598 }
   6599 
   6600 
   6601 /* bytecode compiler **********************************************************/
   6602 
   6603 /* Data used only at compilation time. */
   6604 typedef struct {
   6605   mgroup *group;
   6606 
   6607   uint32_t *pc;
   6608   int fwd_labels[MAXLABEL];
   6609   int back_labels[MAXLABEL];
   6610 
   6611   /* For fields marked "lazy", parse them lazily or eagerly? */
   6612   bool lazy;
   6613 } compiler;
   6614 
   6615 static compiler *newcompiler(mgroup *group, bool lazy) {
   6616   compiler *ret = malloc(sizeof(*ret));
   6617   int i;
   6618 
   6619   ret->group = group;
   6620   ret->lazy = lazy;
   6621   for (i = 0; i < MAXLABEL; i++) {
   6622     ret->fwd_labels[i] = EMPTYLABEL;
   6623     ret->back_labels[i] = EMPTYLABEL;
   6624   }
   6625   return ret;
   6626 }
   6627 
   6628 static void freecompiler(compiler *c) {
   6629   free(c);
   6630 }
   6631 
   6632 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
   6633 
   6634 /* How many words an instruction is. */
   6635 static int instruction_len(uint32_t instr) {
   6636   switch (getop(instr)) {
   6637     case OP_SETDISPATCH: return 1 + ptr_words;
   6638     case OP_TAGN: return 3;
   6639     case OP_SETBIGGROUPNUM: return 2;
   6640     default: return 1;
   6641   }
   6642 }
   6643 
   6644 bool op_has_longofs(int32_t instruction) {
   6645   switch (getop(instruction)) {
   6646     case OP_CALL:
   6647     case OP_BRANCH:
   6648     case OP_CHECKDELIM:
   6649       return true;
   6650     /* The "tag" instructions only have 8 bytes available for the jump target,
   6651      * but that is ok because these opcodes only require short jumps. */
   6652     case OP_TAG1:
   6653     case OP_TAG2:
   6654     case OP_TAGN:
   6655       return false;
   6656     default:
   6657       assert(false);
   6658       return false;
   6659   }
   6660 }
   6661 
   6662 static int32_t getofs(uint32_t instruction) {
   6663   if (op_has_longofs(instruction)) {
   6664     return (int32_t)instruction >> 8;
   6665   } else {
   6666     return (int8_t)(instruction >> 8);
   6667   }
   6668 }
   6669 
   6670 static void setofs(uint32_t *instruction, int32_t ofs) {
   6671   if (op_has_longofs(*instruction)) {
   6672     *instruction = getop(*instruction) | ofs << 8;
   6673   } else {
   6674     *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
   6675   }
   6676   assert(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
   6677 }
   6678 
   6679 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; }
   6680 
   6681 /* Defines a local label at the current PC location.  All previous forward
   6682  * references are updated to point to this location.  The location is noted
   6683  * for any future backward references. */
   6684 static void label(compiler *c, unsigned int label) {
   6685   int val;
   6686   uint32_t *codep;
   6687 
   6688   assert(label < MAXLABEL);
   6689   val = c->fwd_labels[label];
   6690   codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
   6691   while (codep) {
   6692     int ofs = getofs(*codep);
   6693     setofs(codep, c->pc - codep - instruction_len(*codep));
   6694     codep = ofs ? codep + ofs : NULL;
   6695   }
   6696   c->fwd_labels[label] = EMPTYLABEL;
   6697   c->back_labels[label] = pcofs(c);
   6698 }
   6699 
   6700 /* Creates a reference to a numbered label; either a forward reference
   6701  * (positive arg) or backward reference (negative arg).  For forward references
   6702  * the value returned now is actually a "next" pointer into a linked list of all
   6703  * instructions that use this label and will be patched later when the label is
   6704  * defined with label().
   6705  *
   6706  * The returned value is the offset that should be written into the instruction.
   6707  */
   6708 static int32_t labelref(compiler *c, int label) {
   6709   assert(label < MAXLABEL);
   6710   if (label == LABEL_DISPATCH) {
   6711     /* No resolving required. */
   6712     return 0;
   6713   } else if (label < 0) {
   6714     /* Backward local label.  Relative to the next instruction. */
   6715     uint32_t from = (c->pc + 1) - c->group->bytecode;
   6716     return c->back_labels[-label] - from;
   6717   } else {
   6718     /* Forward local label: prepend to (possibly-empty) linked list. */
   6719     int *lptr = &c->fwd_labels[label];
   6720     int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
   6721     *lptr = pcofs(c);
   6722     return ret;
   6723   }
   6724 }
   6725 
   6726 static void put32(compiler *c, uint32_t v) {
   6727   mgroup *g = c->group;
   6728   if (c->pc == g->bytecode_end) {
   6729     int ofs = pcofs(c);
   6730     size_t oldsize = g->bytecode_end - g->bytecode;
   6731     size_t newsize = UPB_MAX(oldsize * 2, 64);
   6732     /* TODO(haberman): handle OOM. */
   6733     g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t));
   6734     g->bytecode_end = g->bytecode + newsize;
   6735     c->pc = g->bytecode + ofs;
   6736   }
   6737   *c->pc++ = v;
   6738 }
   6739 
   6740 static void putop(compiler *c, opcode op, ...) {
   6741   va_list ap;
   6742   va_start(ap, op);
   6743 
   6744   switch (op) {
   6745     case OP_SETDISPATCH: {
   6746       uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
   6747       put32(c, OP_SETDISPATCH);
   6748       put32(c, ptr);
   6749       if (sizeof(uintptr_t) > sizeof(uint32_t))
   6750         put32(c, (uint64_t)ptr >> 32);
   6751       break;
   6752     }
   6753     case OP_STARTMSG:
   6754     case OP_ENDMSG:
   6755     case OP_PUSHLENDELIM:
   6756     case OP_POP:
   6757     case OP_SETDELIM:
   6758     case OP_HALT:
   6759     case OP_RET:
   6760     case OP_DISPATCH:
   6761       put32(c, op);
   6762       break;
   6763     case OP_PARSE_DOUBLE:
   6764     case OP_PARSE_FLOAT:
   6765     case OP_PARSE_INT64:
   6766     case OP_PARSE_UINT64:
   6767     case OP_PARSE_INT32:
   6768     case OP_PARSE_FIXED64:
   6769     case OP_PARSE_FIXED32:
   6770     case OP_PARSE_BOOL:
   6771     case OP_PARSE_UINT32:
   6772     case OP_PARSE_SFIXED32:
   6773     case OP_PARSE_SFIXED64:
   6774     case OP_PARSE_SINT32:
   6775     case OP_PARSE_SINT64:
   6776     case OP_STARTSEQ:
   6777     case OP_ENDSEQ:
   6778     case OP_STARTSUBMSG:
   6779     case OP_ENDSUBMSG:
   6780     case OP_STARTSTR:
   6781     case OP_STRING:
   6782     case OP_ENDSTR:
   6783     case OP_PUSHTAGDELIM:
   6784       put32(c, op | va_arg(ap, upb_selector_t) << 8);
   6785       break;
   6786     case OP_SETBIGGROUPNUM:
   6787       put32(c, op);
   6788       put32(c, va_arg(ap, int));
   6789       break;
   6790     case OP_CALL: {
   6791       const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
   6792       put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
   6793       break;
   6794     }
   6795     case OP_CHECKDELIM:
   6796     case OP_BRANCH: {
   6797       uint32_t instruction = op;
   6798       int label = va_arg(ap, int);
   6799       setofs(&instruction, labelref(c, label));
   6800       put32(c, instruction);
   6801       break;
   6802     }
   6803     case OP_TAG1:
   6804     case OP_TAG2: {
   6805       int label = va_arg(ap, int);
   6806       uint64_t tag = va_arg(ap, uint64_t);
   6807       uint32_t instruction = op | (tag << 16);
   6808       assert(tag <= 0xffff);
   6809       setofs(&instruction, labelref(c, label));
   6810       put32(c, instruction);
   6811       break;
   6812     }
   6813     case OP_TAGN: {
   6814       int label = va_arg(ap, int);
   6815       uint64_t tag = va_arg(ap, uint64_t);
   6816       uint32_t instruction = op | (upb_value_size(tag) << 16);
   6817       setofs(&instruction, labelref(c, label));
   6818       put32(c, instruction);
   6819       put32(c, tag);
   6820       put32(c, tag >> 32);
   6821       break;
   6822     }
   6823   }
   6824 
   6825   va_end(ap);
   6826 }
   6827 
   6828 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE)
   6829 
   6830 const char *upb_pbdecoder_getopname(unsigned int op) {
   6831 #define QUOTE(x) #x
   6832 #define EXPAND_AND_QUOTE(x) QUOTE(x)
   6833 #define OPNAME(x) OP_##x
   6834 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
   6835 #define T(x) OP(PARSE_##x)
   6836   /* Keep in sync with list in decoder.int.h. */
   6837   switch ((opcode)op) {
   6838     T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
   6839     T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
   6840     OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
   6841     OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
   6842     OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
   6843     OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
   6844     OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
   6845   }
   6846   return "<unknown op>";
   6847 #undef OP
   6848 #undef T
   6849 }
   6850 
   6851 #endif
   6852 
   6853 #ifdef UPB_DUMP_BYTECODE
   6854 
   6855 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
   6856 
   6857   uint32_t *begin = p;
   6858 
   6859   while (p < end) {
   6860     fprintf(f, "%p  %8tx", p, p - begin);
   6861     uint32_t instr = *p++;
   6862     uint8_t op = getop(instr);
   6863     fprintf(f, " %s", upb_pbdecoder_getopname(op));
   6864     switch ((opcode)op) {
   6865       case OP_SETDISPATCH: {
   6866         const upb_inttable *dispatch;
   6867         memcpy(&dispatch, p, sizeof(void*));
   6868         p += ptr_words;
   6869         const upb_pbdecodermethod *method =
   6870             (void *)((char *)dispatch -
   6871                      offsetof(upb_pbdecodermethod, dispatch));
   6872         fprintf(f, " %s", upb_msgdef_fullname(
   6873                               upb_handlers_msgdef(method->dest_handlers_)));
   6874         break;
   6875       }
   6876       case OP_DISPATCH:
   6877       case OP_STARTMSG:
   6878       case OP_ENDMSG:
   6879       case OP_PUSHLENDELIM:
   6880       case OP_POP:
   6881       case OP_SETDELIM:
   6882       case OP_HALT:
   6883       case OP_RET:
   6884         break;
   6885       case OP_PARSE_DOUBLE:
   6886       case OP_PARSE_FLOAT:
   6887       case OP_PARSE_INT64:
   6888       case OP_PARSE_UINT64:
   6889       case OP_PARSE_INT32:
   6890       case OP_PARSE_FIXED64:
   6891       case OP_PARSE_FIXED32:
   6892       case OP_PARSE_BOOL:
   6893       case OP_PARSE_UINT32:
   6894       case OP_PARSE_SFIXED32:
   6895       case OP_PARSE_SFIXED64:
   6896       case OP_PARSE_SINT32:
   6897       case OP_PARSE_SINT64:
   6898       case OP_STARTSEQ:
   6899       case OP_ENDSEQ:
   6900       case OP_STARTSUBMSG:
   6901       case OP_ENDSUBMSG:
   6902       case OP_STARTSTR:
   6903       case OP_STRING:
   6904       case OP_ENDSTR:
   6905       case OP_PUSHTAGDELIM:
   6906         fprintf(f, " %d", instr >> 8);
   6907         break;
   6908       case OP_SETBIGGROUPNUM:
   6909         fprintf(f, " %d", *p++);
   6910         break;
   6911       case OP_CHECKDELIM:
   6912       case OP_CALL:
   6913       case OP_BRANCH:
   6914         fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
   6915         break;
   6916       case OP_TAG1:
   6917       case OP_TAG2: {
   6918         fprintf(f, " tag:0x%x", instr >> 16);
   6919         if (getofs(instr)) {
   6920           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
   6921         }
   6922         break;
   6923       }
   6924       case OP_TAGN: {
   6925         uint64_t tag = *p++;
   6926         tag |= (uint64_t)*p++ << 32;
   6927         fprintf(f, " tag:0x%llx", (long long)tag);
   6928         fprintf(f, " n:%d", instr >> 16);
   6929         if (getofs(instr)) {
   6930           fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
   6931         }
   6932         break;
   6933       }
   6934     }
   6935     fputs("\n", f);
   6936   }
   6937 }
   6938 
   6939 #endif
   6940 
   6941 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
   6942   uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
   6943   uint64_t encoded_tag = upb_vencode32(tag);
   6944   /* No tag should be greater than 5 bytes. */
   6945   assert(encoded_tag <= 0xffffffffff);
   6946   return encoded_tag;
   6947 }
   6948 
   6949 static void putchecktag(compiler *c, const upb_fielddef *f,
   6950                         int wire_type, int dest) {
   6951   uint64_t tag = get_encoded_tag(f, wire_type);
   6952   switch (upb_value_size(tag)) {
   6953     case 1:
   6954       putop(c, OP_TAG1, dest, tag);
   6955       break;
   6956     case 2:
   6957       putop(c, OP_TAG2, dest, tag);
   6958       break;
   6959     default:
   6960       putop(c, OP_TAGN, dest, tag);
   6961       break;
   6962   }
   6963 }
   6964 
   6965 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
   6966   upb_selector_t selector;
   6967   bool ok = upb_handlers_getselector(f, type, &selector);
   6968   UPB_ASSERT_VAR(ok, ok);
   6969   return selector;
   6970 }
   6971 
   6972 /* Takes an existing, primary dispatch table entry and repacks it with a
   6973  * different alternate wire type.  Called when we are inserting a secondary
   6974  * dispatch table entry for an alternate wire type. */
   6975 static uint64_t repack(uint64_t dispatch, int new_wt2) {
   6976   uint64_t ofs;
   6977   uint8_t wt1;
   6978   uint8_t old_wt2;
   6979   upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
   6980   assert(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
   6981   return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
   6982 }
   6983 
   6984 /* Marks the current bytecode position as the dispatch target for this message,
   6985  * field, and wire type. */
   6986 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
   6987                            const upb_fielddef *f, int wire_type) {
   6988   /* Offset is relative to msg base. */
   6989   uint64_t ofs = pcofs(c) - method->code_base.ofs;
   6990   uint32_t fn = upb_fielddef_number(f);
   6991   upb_inttable *d = &method->dispatch;
   6992   upb_value v;
   6993   if (upb_inttable_remove(d, fn, &v)) {
   6994     /* TODO: prioritize based on packed setting in .proto file. */
   6995     uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
   6996     upb_inttable_insert(d, fn, upb_value_uint64(repacked));
   6997     upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
   6998   } else {
   6999     uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
   7000     upb_inttable_insert(d, fn, upb_value_uint64(val));
   7001   }
   7002 }
   7003 
   7004 static void putpush(compiler *c, const upb_fielddef *f) {
   7005   if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
   7006     putop(c, OP_PUSHLENDELIM);
   7007   } else {
   7008     uint32_t fn = upb_fielddef_number(f);
   7009     if (fn >= 1 << 24) {
   7010       putop(c, OP_PUSHTAGDELIM, 0);
   7011       putop(c, OP_SETBIGGROUPNUM, fn);
   7012     } else {
   7013       putop(c, OP_PUSHTAGDELIM, fn);
   7014     }
   7015   }
   7016 }
   7017 
   7018 static upb_pbdecodermethod *find_submethod(const compiler *c,
   7019                                            const upb_pbdecodermethod *method,
   7020                                            const upb_fielddef *f) {
   7021   const upb_handlers *sub =
   7022       upb_handlers_getsubhandlers(method->dest_handlers_, f);
   7023   upb_value v;
   7024   return upb_inttable_lookupptr(&c->group->methods, sub, &v)
   7025              ? upb_value_getptr(v)
   7026              : NULL;
   7027 }
   7028 
   7029 static void putsel(compiler *c, opcode op, upb_selector_t sel,
   7030                    const upb_handlers *h) {
   7031   if (upb_handlers_gethandler(h, sel)) {
   7032     putop(c, op, sel);
   7033   }
   7034 }
   7035 
   7036 /* Puts an opcode to call a callback, but only if a callback actually exists for
   7037  * this field and handler type. */
   7038 static void maybeput(compiler *c, opcode op, const upb_handlers *h,
   7039                      const upb_fielddef *f, upb_handlertype_t type) {
   7040   putsel(c, op, getsel(f, type), h);
   7041 }
   7042 
   7043 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
   7044   if (!upb_fielddef_lazy(f))
   7045     return false;
   7046 
   7047   return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) ||
   7048          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) ||
   7049          upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR));
   7050 }
   7051 
   7052 
   7053 /* bytecode compiler code generation ******************************************/
   7054 
   7055 /* Symbolic names for our local labels. */
   7056 #define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
   7057 #define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
   7058 #define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
   7059 #define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
   7060 
   7061 /* Generates bytecode to parse a single non-lazy message field. */
   7062 static void generate_msgfield(compiler *c, const upb_fielddef *f,
   7063                               upb_pbdecodermethod *method) {
   7064   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
   7065   const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
   7066   int wire_type;
   7067 
   7068   if (!sub_m) {
   7069     /* Don't emit any code for this field at all; it will be parsed as an
   7070      * unknown field. */
   7071     return;
   7072   }
   7073 
   7074   label(c, LABEL_FIELD);
   7075 
   7076   wire_type =
   7077       (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
   7078           ? UPB_WIRE_TYPE_DELIMITED
   7079           : UPB_WIRE_TYPE_START_GROUP;
   7080 
   7081   if (upb_fielddef_isseq(f)) {
   7082     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7083     putchecktag(c, f, wire_type, LABEL_DISPATCH);
   7084    dispatchtarget(c, method, f, wire_type);
   7085     putop(c, OP_PUSHTAGDELIM, 0);
   7086     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
   7087    label(c, LABEL_LOOPSTART);
   7088     putpush(c, f);
   7089     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
   7090     putop(c, OP_CALL, sub_m);
   7091     putop(c, OP_POP);
   7092     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
   7093     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
   7094       putop(c, OP_SETDELIM);
   7095     }
   7096     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
   7097     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
   7098     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
   7099    label(c, LABEL_LOOPBREAK);
   7100     putop(c, OP_POP);
   7101     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
   7102   } else {
   7103     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7104     putchecktag(c, f, wire_type, LABEL_DISPATCH);
   7105    dispatchtarget(c, method, f, wire_type);
   7106     putpush(c, f);
   7107     putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
   7108     putop(c, OP_CALL, sub_m);
   7109     putop(c, OP_POP);
   7110     maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
   7111     if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
   7112       putop(c, OP_SETDELIM);
   7113     }
   7114   }
   7115 }
   7116 
   7117 /* Generates bytecode to parse a single string or lazy submessage field. */
   7118 static void generate_delimfield(compiler *c, const upb_fielddef *f,
   7119                                 upb_pbdecodermethod *method) {
   7120   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
   7121 
   7122   label(c, LABEL_FIELD);
   7123   if (upb_fielddef_isseq(f)) {
   7124     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7125     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
   7126    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
   7127     putop(c, OP_PUSHTAGDELIM, 0);
   7128     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
   7129    label(c, LABEL_LOOPSTART);
   7130     putop(c, OP_PUSHLENDELIM);
   7131     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
   7132     /* Need to emit even if no handler to skip past the string. */
   7133     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
   7134     putop(c, OP_POP);
   7135     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
   7136     putop(c, OP_SETDELIM);
   7137     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
   7138     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
   7139     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
   7140    label(c, LABEL_LOOPBREAK);
   7141     putop(c, OP_POP);
   7142     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
   7143   } else {
   7144     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7145     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
   7146    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
   7147     putop(c, OP_PUSHLENDELIM);
   7148     putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
   7149     putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
   7150     putop(c, OP_POP);
   7151     maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
   7152     putop(c, OP_SETDELIM);
   7153   }
   7154 }
   7155 
   7156 /* Generates bytecode to parse a single primitive field. */
   7157 static void generate_primitivefield(compiler *c, const upb_fielddef *f,
   7158                                     upb_pbdecodermethod *method) {
   7159   const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
   7160   upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
   7161   opcode parse_type;
   7162   upb_selector_t sel;
   7163   int wire_type;
   7164 
   7165   label(c, LABEL_FIELD);
   7166 
   7167   /* From a decoding perspective, ENUM is the same as INT32. */
   7168   if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
   7169     descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
   7170 
   7171   parse_type = (opcode)descriptor_type;
   7172 
   7173   /* TODO(haberman): generate packed or non-packed first depending on "packed"
   7174    * setting in the fielddef.  This will favor (in speed) whichever was
   7175    * specified. */
   7176 
   7177   assert((int)parse_type >= 0 && parse_type <= OP_MAX);
   7178   sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
   7179   wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
   7180   if (upb_fielddef_isseq(f)) {
   7181     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7182     putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
   7183    dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
   7184     putop(c, OP_PUSHLENDELIM);
   7185     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
   7186    label(c, LABEL_LOOPSTART);
   7187     putop(c, parse_type, sel);
   7188     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
   7189     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
   7190    dispatchtarget(c, method, f, wire_type);
   7191     putop(c, OP_PUSHTAGDELIM, 0);
   7192     putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
   7193    label(c, LABEL_LOOPSTART);
   7194     putop(c, parse_type, sel);
   7195     putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
   7196     putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
   7197     putop(c, OP_BRANCH, -LABEL_LOOPSTART);
   7198    label(c, LABEL_LOOPBREAK);
   7199     putop(c, OP_POP);  /* Packed and non-packed join. */
   7200     maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
   7201     putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
   7202   } else {
   7203     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7204     putchecktag(c, f, wire_type, LABEL_DISPATCH);
   7205    dispatchtarget(c, method, f, wire_type);
   7206     putop(c, parse_type, sel);
   7207   }
   7208 }
   7209 
   7210 /* Adds bytecode for parsing the given message to the given decoderplan,
   7211  * while adding all dispatch targets to this message's dispatch table. */
   7212 static void compile_method(compiler *c, upb_pbdecodermethod *method) {
   7213   const upb_handlers *h;
   7214   const upb_msgdef *md;
   7215   uint32_t* start_pc;
   7216   upb_msg_field_iter i;
   7217   upb_value val;
   7218 
   7219   assert(method);
   7220 
   7221   /* Clear all entries in the dispatch table. */
   7222   upb_inttable_uninit(&method->dispatch);
   7223   upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
   7224 
   7225   h = upb_pbdecodermethod_desthandlers(method);
   7226   md = upb_handlers_msgdef(h);
   7227 
   7228  method->code_base.ofs = pcofs(c);
   7229   putop(c, OP_SETDISPATCH, &method->dispatch);
   7230   putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
   7231  label(c, LABEL_FIELD);
   7232   start_pc = c->pc;
   7233   for(upb_msg_field_begin(&i, md);
   7234       !upb_msg_field_done(&i);
   7235       upb_msg_field_next(&i)) {
   7236     const upb_fielddef *f = upb_msg_iter_field(&i);
   7237     upb_fieldtype_t type = upb_fielddef_type(f);
   7238 
   7239     if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
   7240       generate_msgfield(c, f, method);
   7241     } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
   7242                type == UPB_TYPE_MESSAGE) {
   7243       generate_delimfield(c, f, method);
   7244     } else {
   7245       generate_primitivefield(c, f, method);
   7246     }
   7247   }
   7248 
   7249   /* If there were no fields, or if no handlers were defined, we need to
   7250    * generate a non-empty loop body so that we can at least dispatch for unknown
   7251    * fields and check for the end of the message. */
   7252   if (c->pc == start_pc) {
   7253     /* Check for end-of-message. */
   7254     putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
   7255     /* Unconditionally dispatch. */
   7256     putop(c, OP_DISPATCH, 0);
   7257   }
   7258 
   7259   /* For now we just loop back to the last field of the message (or if none,
   7260    * the DISPATCH opcode for the message). */
   7261   putop(c, OP_BRANCH, -LABEL_FIELD);
   7262 
   7263   /* Insert both a label and a dispatch table entry for this end-of-msg. */
   7264  label(c, LABEL_ENDMSG);
   7265   val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
   7266   upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
   7267 
   7268   putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
   7269   putop(c, OP_RET);
   7270 
   7271   upb_inttable_compact(&method->dispatch);
   7272 }
   7273 
   7274 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
   7275  * Returns the method for these handlers.
   7276  *
   7277  * Generates a new method for every destination handlers reachable from "h". */
   7278 static void find_methods(compiler *c, const upb_handlers *h) {
   7279   upb_value v;
   7280   upb_msg_field_iter i;
   7281   const upb_msgdef *md;
   7282 
   7283   if (upb_inttable_lookupptr(&c->group->methods, h, &v))
   7284     return;
   7285   newmethod(h, c->group);
   7286 
   7287   /* Find submethods. */
   7288   md = upb_handlers_msgdef(h);
   7289   for(upb_msg_field_begin(&i, md);
   7290       !upb_msg_field_done(&i);
   7291       upb_msg_field_next(&i)) {
   7292     const upb_fielddef *f = upb_msg_iter_field(&i);
   7293     const upb_handlers *sub_h;
   7294     if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
   7295         (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
   7296       /* We only generate a decoder method for submessages with handlers.
   7297        * Others will be parsed as unknown fields. */
   7298       find_methods(c, sub_h);
   7299     }
   7300   }
   7301 }
   7302 
   7303 /* (Re-)compile bytecode for all messages in "msgs."
   7304  * Overwrites any existing bytecode in "c". */
   7305 static void compile_methods(compiler *c) {
   7306   upb_inttable_iter i;
   7307 
   7308   /* Start over at the beginning of the bytecode. */
   7309   c->pc = c->group->bytecode;
   7310 
   7311   upb_inttable_begin(&i, &c->group->methods);
   7312   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   7313     upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
   7314     compile_method(c, method);
   7315   }
   7316 }
   7317 
   7318 static void set_bytecode_handlers(mgroup *g) {
   7319   upb_inttable_iter i;
   7320   upb_inttable_begin(&i, &g->methods);
   7321   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   7322     upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
   7323     upb_byteshandler *h = &m->input_handler_;
   7324 
   7325     m->code_base.ptr = g->bytecode + m->code_base.ofs;
   7326 
   7327     upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
   7328     upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
   7329     upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
   7330   }
   7331 }
   7332 
   7333 
   7334 /* JIT setup. *****************************************************************/
   7335 
   7336 #ifdef UPB_USE_JIT_X64
   7337 
   7338 static void sethandlers(mgroup *g, bool allowjit) {
   7339   g->jit_code = NULL;
   7340   if (allowjit) {
   7341     /* Compile byte-code into machine code, create handlers. */
   7342     upb_pbdecoder_jit(g);
   7343   } else {
   7344     set_bytecode_handlers(g);
   7345   }
   7346 }
   7347 
   7348 #else  /* UPB_USE_JIT_X64 */
   7349 
   7350 static void sethandlers(mgroup *g, bool allowjit) {
   7351   /* No JIT compiled in; use bytecode handlers unconditionally. */
   7352   UPB_UNUSED(allowjit);
   7353   set_bytecode_handlers(g);
   7354 }
   7355 
   7356 #endif  /* UPB_USE_JIT_X64 */
   7357 
   7358 
   7359 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest
   7360  * handlers and other mgroups (but verify we have a transitive closure). */
   7361 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy,
   7362                          const void *owner) {
   7363   mgroup *g;
   7364   compiler *c;
   7365 
   7366   UPB_UNUSED(allowjit);
   7367   assert(upb_handlers_isfrozen(dest));
   7368 
   7369   g = newgroup(owner);
   7370   c = newcompiler(g, lazy);
   7371   find_methods(c, dest);
   7372 
   7373   /* We compile in two passes:
   7374    * 1. all messages are assigned relative offsets from the beginning of the
   7375    *    bytecode (saved in method->code_base).
   7376    * 2. forwards OP_CALL instructions can be correctly linked since message
   7377    *    offsets have been previously assigned.
   7378    *
   7379    * Could avoid the second pass by linking OP_CALL instructions somehow. */
   7380   compile_methods(c);
   7381   compile_methods(c);
   7382   g->bytecode_end = c->pc;
   7383   freecompiler(c);
   7384 
   7385 #ifdef UPB_DUMP_BYTECODE
   7386   {
   7387     FILE *f = fopen("/tmp/upb-bytecode", "wb");
   7388     assert(f);
   7389     dumpbc(g->bytecode, g->bytecode_end, stderr);
   7390     dumpbc(g->bytecode, g->bytecode_end, f);
   7391     fclose(f);
   7392   }
   7393 #endif
   7394 
   7395   sethandlers(g, allowjit);
   7396   return g;
   7397 }
   7398 
   7399 
   7400 /* upb_pbcodecache ************************************************************/
   7401 
   7402 void upb_pbcodecache_init(upb_pbcodecache *c) {
   7403   upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR);
   7404   c->allow_jit_ = true;
   7405 }
   7406 
   7407 void upb_pbcodecache_uninit(upb_pbcodecache *c) {
   7408   upb_inttable_iter i;
   7409   upb_inttable_begin(&i, &c->groups);
   7410   for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
   7411     const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i));
   7412     mgroup_unref(group, c);
   7413   }
   7414   upb_inttable_uninit(&c->groups);
   7415 }
   7416 
   7417 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) {
   7418   return c->allow_jit_;
   7419 }
   7420 
   7421 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) {
   7422   if (upb_inttable_count(&c->groups) > 0)
   7423     return false;
   7424   c->allow_jit_ = allow;
   7425   return true;
   7426 }
   7427 
   7428 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod(
   7429     upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) {
   7430   upb_value v;
   7431   bool ok;
   7432 
   7433   /* Right now we build a new DecoderMethod every time.
   7434    * TODO(haberman): properly cache methods by their true key. */
   7435   const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c);
   7436   upb_inttable_push(&c->groups, upb_value_constptr(g));
   7437 
   7438   ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v);
   7439   UPB_ASSERT_VAR(ok, ok);
   7440   return upb_value_getptr(v);
   7441 }
   7442 
   7443 
   7444 /* upb_pbdecodermethodopts ****************************************************/
   7445 
   7446 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts,
   7447                                   const upb_handlers *h) {
   7448   opts->handlers = h;
   7449   opts->lazy = false;
   7450 }
   7451 
   7452 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) {
   7453   opts->lazy = lazy;
   7454 }
   7455 /*
   7456 ** upb::Decoder (Bytecode Decoder VM)
   7457 **
   7458 ** Bytecode must previously have been generated using the bytecode compiler in
   7459 ** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
   7460 ** parse the input.
   7461 **
   7462 ** Decoding is fully resumable; we just keep a pointer to the current bytecode
   7463 ** instruction and resume from there.  A fair amount of the logic here is to
   7464 ** handle the fact that values can span buffer seams and we have to be able to
   7465 ** be capable of suspending/resuming from any byte in the stream.  This
   7466 ** sometimes requires keeping a few trailing bytes from the last buffer around
   7467 ** in the "residual" buffer.
   7468 */
   7469 
   7470 #include <inttypes.h>
   7471 #include <stddef.h>
   7472 
   7473 #ifdef UPB_DUMP_BYTECODE
   7474 #include <stdio.h>
   7475 #endif
   7476 
   7477 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
   7478 
   7479 /* Error messages that are shared between the bytecode and JIT decoders. */
   7480 const char *kPbDecoderStackOverflow = "Nesting too deep.";
   7481 const char *kPbDecoderSubmessageTooLong =
   7482     "Submessage end extends past enclosing submessage.";
   7483 
   7484 /* Error messages shared within this file. */
   7485 static const char *kUnterminatedVarint = "Unterminated varint.";
   7486 
   7487 /* upb_pbdecoder **************************************************************/
   7488 
   7489 static opcode halt = OP_HALT;
   7490 
   7491 /* Whether an op consumes any of the input buffer. */
   7492 static bool consumes_input(opcode op) {
   7493   switch (op) {
   7494     case OP_SETDISPATCH:
   7495     case OP_STARTMSG:
   7496     case OP_ENDMSG:
   7497     case OP_STARTSEQ:
   7498     case OP_ENDSEQ:
   7499     case OP_STARTSUBMSG:
   7500     case OP_ENDSUBMSG:
   7501     case OP_STARTSTR:
   7502     case OP_ENDSTR:
   7503     case OP_PUSHTAGDELIM:
   7504     case OP_POP:
   7505     case OP_SETDELIM:
   7506     case OP_SETBIGGROUPNUM:
   7507     case OP_CHECKDELIM:
   7508     case OP_CALL:
   7509     case OP_RET:
   7510     case OP_BRANCH:
   7511       return false;
   7512     default:
   7513       return true;
   7514   }
   7515 }
   7516 
   7517 static size_t stacksize(upb_pbdecoder *d, size_t entries) {
   7518   UPB_UNUSED(d);
   7519   return entries * sizeof(upb_pbdecoder_frame);
   7520 }
   7521 
   7522 static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
   7523   UPB_UNUSED(d);
   7524 
   7525 #ifdef UPB_USE_JIT_X64
   7526   if (d->method_->is_native_) {
   7527     /* Each native stack frame needs two pointers, plus we need a few frames for
   7528      * the enter/exit trampolines. */
   7529     size_t ret = entries * sizeof(void*) * 2;
   7530     ret += sizeof(void*) * 10;
   7531     return ret;
   7532   }
   7533 #endif
   7534 
   7535   return entries * sizeof(uint32_t*);
   7536 }
   7537 
   7538 
   7539 static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
   7540 
   7541 /* It's unfortunate that we have to micro-manage the compiler with
   7542  * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
   7543  * specific to one hardware configuration.  But empirically on a Core i7,
   7544  * performance increases 30-50% with these annotations.  Every instance where
   7545  * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
   7546  * benchmarks. */
   7547 
   7548 static void seterr(upb_pbdecoder *d, const char *msg) {
   7549   upb_status status = UPB_STATUS_INIT;
   7550   upb_status_seterrmsg(&status, msg);
   7551   upb_env_reporterror(d->env, &status);
   7552 }
   7553 
   7554 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
   7555   seterr(d, msg);
   7556 }
   7557 
   7558 
   7559 /* Buffering ******************************************************************/
   7560 
   7561 /* We operate on one buffer at a time, which is either the user's buffer passed
   7562  * to our "decode" callback or some residual bytes from the previous buffer. */
   7563 
   7564 /* How many bytes can be safely read from d->ptr without reading past end-of-buf
   7565  * or past the current delimited end. */
   7566 static size_t curbufleft(const upb_pbdecoder *d) {
   7567   assert(d->data_end >= d->ptr);
   7568   return d->data_end - d->ptr;
   7569 }
   7570 
   7571 /* How many bytes are available before end-of-buffer. */
   7572 static size_t bufleft(const upb_pbdecoder *d) {
   7573   return d->end - d->ptr;
   7574 }
   7575 
   7576 /* Overall stream offset of d->ptr. */
   7577 uint64_t offset(const upb_pbdecoder *d) {
   7578   return d->bufstart_ofs + (d->ptr - d->buf);
   7579 }
   7580 
   7581 /* How many bytes are available before the end of this delimited region. */
   7582 size_t delim_remaining(const upb_pbdecoder *d) {
   7583   return d->top->end_ofs - offset(d);
   7584 }
   7585 
   7586 /* Advances d->ptr. */
   7587 static void advance(upb_pbdecoder *d, size_t len) {
   7588   assert(curbufleft(d) >= len);
   7589   d->ptr += len;
   7590 }
   7591 
   7592 static bool in_buf(const char *p, const char *buf, const char *end) {
   7593   return p >= buf && p <= end;
   7594 }
   7595 
   7596 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
   7597   return in_buf(p, d->residual, d->residual_end);
   7598 }
   7599 
   7600 /* Calculates the delim_end value, which is affected by both the current buffer
   7601  * and the parsing stack, so must be called whenever either is updated. */
   7602 static void set_delim_end(upb_pbdecoder *d) {
   7603   size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
   7604   if (delim_ofs <= (size_t)(d->end - d->buf)) {
   7605     d->delim_end = d->buf + delim_ofs;
   7606     d->data_end = d->delim_end;
   7607   } else {
   7608     d->data_end = d->end;
   7609     d->delim_end = NULL;
   7610   }
   7611 }
   7612 
   7613 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
   7614   d->ptr = buf;
   7615   d->buf = buf;
   7616   d->end = end;
   7617   set_delim_end(d);
   7618 }
   7619 
   7620 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
   7621   assert(curbufleft(d) == 0);
   7622   d->bufstart_ofs += (d->end - d->buf);
   7623   switchtobuf(d, buf, buf + len);
   7624 }
   7625 
   7626 static void checkpoint(upb_pbdecoder *d) {
   7627   /* The assertion here is in the interests of efficiency, not correctness.
   7628    * We are trying to ensure that we don't checkpoint() more often than
   7629    * necessary. */
   7630   assert(d->checkpoint != d->ptr);
   7631   d->checkpoint = d->ptr;
   7632 }
   7633 
   7634 /* Skips "bytes" bytes in the stream, which may be more than available.  If we
   7635  * skip more bytes than are available, we return a long read count to the caller
   7636  * indicating how many bytes can be skipped over before passing actual data
   7637  * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
   7638  * won't actually be read.
   7639  */
   7640 static int32_t skip(upb_pbdecoder *d, size_t bytes) {
   7641   assert(!in_residual_buf(d, d->ptr) || d->size_param == 0);
   7642   assert(d->skip == 0);
   7643   if (bytes > delim_remaining(d)) {
   7644     seterr(d, "Skipped value extended beyond enclosing submessage.");
   7645     return upb_pbdecoder_suspend(d);
   7646   } else if (bufleft(d) > bytes) {
   7647     /* Skipped data is all in current buffer, and more is still available. */
   7648     advance(d, bytes);
   7649     d->skip = 0;
   7650     return DECODE_OK;
   7651   } else {
   7652     /* Skipped data extends beyond currently available buffers. */
   7653     d->pc = d->last;
   7654     d->skip = bytes - curbufleft(d);
   7655     d->bufstart_ofs += (d->end - d->buf);
   7656     d->residual_end = d->residual;
   7657     switchtobuf(d, d->residual, d->residual_end);
   7658     return d->size_param + d->skip;
   7659   }
   7660 }
   7661 
   7662 
   7663 /* Resumes the decoder from an initial state or from a previous suspend. */
   7664 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
   7665                              size_t size, const upb_bufhandle *handle) {
   7666   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
   7667 
   7668   d->buf_param = buf;
   7669   d->size_param = size;
   7670   d->handle = handle;
   7671 
   7672   if (d->residual_end > d->residual) {
   7673     /* We have residual bytes from the last buffer. */
   7674     assert(d->ptr == d->residual);
   7675   } else {
   7676     switchtobuf(d, buf, buf + size);
   7677   }
   7678 
   7679   d->checkpoint = d->ptr;
   7680 
   7681   if (d->skip) {
   7682     size_t skip_bytes = d->skip;
   7683     d->skip = 0;
   7684     CHECK_RETURN(skip(d, skip_bytes));
   7685     d->checkpoint = d->ptr;
   7686   }
   7687 
   7688   if (!buf) {
   7689     /* NULL buf is ok if its entire span is covered by the "skip" above, but
   7690      * by this point we know that "skip" doesn't cover the buffer. */
   7691     seterr(d, "Passed NULL buffer over non-skippable region.");
   7692     return upb_pbdecoder_suspend(d);
   7693   }
   7694 
   7695   if (d->top->groupnum < 0) {
   7696     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
   7697     d->checkpoint = d->ptr;
   7698   }
   7699 
   7700   return DECODE_OK;
   7701 }
   7702 
   7703 /* Suspends the decoder at the last checkpoint, without saving any residual
   7704  * bytes.  If there are any unconsumed bytes, returns a short byte count. */
   7705 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
   7706   d->pc = d->last;
   7707   if (d->checkpoint == d->residual) {
   7708     /* Checkpoint was in residual buf; no user bytes were consumed. */
   7709     d->ptr = d->residual;
   7710     return 0;
   7711   } else {
   7712     size_t consumed;
   7713     assert(!in_residual_buf(d, d->checkpoint));
   7714     assert(d->buf == d->buf_param);
   7715 
   7716     consumed = d->checkpoint - d->buf;
   7717     d->bufstart_ofs += consumed;
   7718     d->residual_end = d->residual;
   7719     switchtobuf(d, d->residual, d->residual_end);
   7720     return consumed;
   7721   }
   7722 }
   7723 
   7724 /* Suspends the decoder at the last checkpoint, and saves any unconsumed
   7725  * bytes in our residual buffer.  This is necessary if we need more user
   7726  * bytes to form a complete value, which might not be contiguous in the
   7727  * user's buffers.  Always consumes all user bytes. */
   7728 static size_t suspend_save(upb_pbdecoder *d) {
   7729   /* We hit end-of-buffer before we could parse a full value.
   7730    * Save any unconsumed bytes (if any) to the residual buffer. */
   7731   d->pc = d->last;
   7732 
   7733   if (d->checkpoint == d->residual) {
   7734     /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
   7735     assert((d->residual_end - d->residual) + d->size_param <=
   7736            sizeof(d->residual));
   7737     if (!in_residual_buf(d, d->ptr)) {
   7738       d->bufstart_ofs -= (d->residual_end - d->residual);
   7739     }
   7740     memcpy(d->residual_end, d->buf_param, d->size_param);
   7741     d->residual_end += d->size_param;
   7742   } else {
   7743     /* Checkpoint was in user buf; old residual bytes not needed. */
   7744     size_t save;
   7745     assert(!in_residual_buf(d, d->checkpoint));
   7746 
   7747     d->ptr = d->checkpoint;
   7748     save = curbufleft(d);
   7749     assert(save <= sizeof(d->residual));
   7750     memcpy(d->residual, d->ptr, save);
   7751     d->residual_end = d->residual + save;
   7752     d->bufstart_ofs = offset(d);
   7753   }
   7754 
   7755   switchtobuf(d, d->residual, d->residual_end);
   7756   return d->size_param;
   7757 }
   7758 
   7759 /* Copies the next "bytes" bytes into "buf" and advances the stream.
   7760  * Requires that this many bytes are available in the current buffer. */
   7761 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
   7762                                          size_t bytes) {
   7763   assert(bytes <= curbufleft(d));
   7764   memcpy(buf, d->ptr, bytes);
   7765   advance(d, bytes);
   7766 }
   7767 
   7768 /* Slow path for getting the next "bytes" bytes, regardless of whether they are
   7769  * available in the current buffer or not.  Returns a status code as described
   7770  * in decoder.int.h. */
   7771 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
   7772                                           size_t bytes) {
   7773   const size_t avail = curbufleft(d);
   7774   consumebytes(d, buf, avail);
   7775   bytes -= avail;
   7776   assert(bytes > 0);
   7777   if (in_residual_buf(d, d->ptr)) {
   7778     advancetobuf(d, d->buf_param, d->size_param);
   7779   }
   7780   if (curbufleft(d) >= bytes) {
   7781     consumebytes(d, (char *)buf + avail, bytes);
   7782     return DECODE_OK;
   7783   } else if (d->data_end == d->delim_end) {
   7784     seterr(d, "Submessage ended in the middle of a value or group");
   7785     return upb_pbdecoder_suspend(d);
   7786   } else {
   7787     return suspend_save(d);
   7788   }
   7789 }
   7790 
   7791 /* Gets the next "bytes" bytes, regardless of whether they are available in the
   7792  * current buffer or not.  Returns a status code as described in decoder.int.h.
   7793  */
   7794 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
   7795                                         size_t bytes) {
   7796   if (curbufleft(d) >= bytes) {
   7797     /* Buffer has enough data to satisfy. */
   7798     consumebytes(d, buf, bytes);
   7799     return DECODE_OK;
   7800   } else {
   7801     return getbytes_slow(d, buf, bytes);
   7802   }
   7803 }
   7804 
   7805 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
   7806                                           size_t bytes) {
   7807   size_t ret = curbufleft(d);
   7808   memcpy(buf, d->ptr, ret);
   7809   if (in_residual_buf(d, d->ptr)) {
   7810     size_t copy = UPB_MIN(bytes - ret, d->size_param);
   7811     memcpy((char *)buf + ret, d->buf_param, copy);
   7812     ret += copy;
   7813   }
   7814   return ret;
   7815 }
   7816 
   7817 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
   7818                                         size_t bytes) {
   7819   if (curbufleft(d) >= bytes) {
   7820     memcpy(buf, d->ptr, bytes);
   7821     return bytes;
   7822   } else {
   7823     return peekbytes_slow(d, buf, bytes);
   7824   }
   7825 }
   7826 
   7827 
   7828 /* Decoding of wire types *****************************************************/
   7829 
   7830 /* Slow path for decoding a varint from the current buffer position.
   7831  * Returns a status code as described in decoder.int.h. */
   7832 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
   7833                                                       uint64_t *u64) {
   7834   uint8_t byte = 0x80;
   7835   int bitpos;
   7836   *u64 = 0;
   7837   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
   7838     int32_t ret = getbytes(d, &byte, 1);
   7839     if (ret >= 0) return ret;
   7840     *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
   7841   }
   7842   if(bitpos == 70 && (byte & 0x80)) {
   7843     seterr(d, kUnterminatedVarint);
   7844     return upb_pbdecoder_suspend(d);
   7845   }
   7846   return DECODE_OK;
   7847 }
   7848 
   7849 /* Decodes a varint from the current buffer position.
   7850  * Returns a status code as described in decoder.int.h. */
   7851 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
   7852   if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
   7853     *u64 = *d->ptr;
   7854     advance(d, 1);
   7855     return DECODE_OK;
   7856   } else if (curbufleft(d) >= 10) {
   7857     /* Fast case. */
   7858     upb_decoderet r = upb_vdecode_fast(d->ptr);
   7859     if (r.p == NULL) {
   7860       seterr(d, kUnterminatedVarint);
   7861       return upb_pbdecoder_suspend(d);
   7862     }
   7863     advance(d, r.p - d->ptr);
   7864     *u64 = r.val;
   7865     return DECODE_OK;
   7866   } else {
   7867     /* Slow case -- varint spans buffer seam. */
   7868     return upb_pbdecoder_decode_varint_slow(d, u64);
   7869   }
   7870 }
   7871 
   7872 /* Decodes a 32-bit varint from the current buffer position.
   7873  * Returns a status code as described in decoder.int.h. */
   7874 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
   7875   uint64_t u64;
   7876   int32_t ret = decode_varint(d, &u64);
   7877   if (ret >= 0) return ret;
   7878   if (u64 > UINT32_MAX) {
   7879     seterr(d, "Unterminated 32-bit varint");
   7880     /* TODO(haberman) guarantee that this function return is >= 0 somehow,
   7881      * so we know this path will always be treated as error by our caller.
   7882      * Right now the size_t -> int32_t can overflow and produce negative values.
   7883      */
   7884     *u32 = 0;
   7885     return upb_pbdecoder_suspend(d);
   7886   }
   7887   *u32 = u64;
   7888   return DECODE_OK;
   7889 }
   7890 
   7891 /* Decodes a fixed32 from the current buffer position.
   7892  * Returns a status code as described in decoder.int.h.
   7893  * TODO: proper byte swapping for big-endian machines. */
   7894 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
   7895   return getbytes(d, u32, 4);
   7896 }
   7897 
   7898 /* Decodes a fixed64 from the current buffer position.
   7899  * Returns a status code as described in decoder.int.h.
   7900  * TODO: proper byte swapping for big-endian machines. */
   7901 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
   7902   return getbytes(d, u64, 8);
   7903 }
   7904 
   7905 /* Non-static versions of the above functions.
   7906  * These are called by the JIT for fallback paths. */
   7907 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
   7908   return decode_fixed32(d, u32);
   7909 }
   7910 
   7911 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
   7912   return decode_fixed64(d, u64);
   7913 }
   7914 
   7915 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
   7916 static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
   7917 
   7918 /* Pushes a frame onto the decoder stack. */
   7919 static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
   7920   upb_pbdecoder_frame *fr = d->top;
   7921 
   7922   if (end > fr->end_ofs) {
   7923     seterr(d, kPbDecoderSubmessageTooLong);
   7924     return false;
   7925   } else if (fr == d->limit) {
   7926     seterr(d, kPbDecoderStackOverflow);
   7927     return false;
   7928   }
   7929 
   7930   fr++;
   7931   fr->end_ofs = end;
   7932   fr->dispatch = NULL;
   7933   fr->groupnum = 0;
   7934   d->top = fr;
   7935   return true;
   7936 }
   7937 
   7938 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
   7939   /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
   7940    * field number) prior to hitting any enclosing submessage end, pushing our
   7941    * existing delim end prevents us from continuing to parse values from a
   7942    * corrupt proto that doesn't give us an END tag in time. */
   7943   if (!decoder_push(d, d->top->end_ofs))
   7944     return false;
   7945   d->top->groupnum = arg;
   7946   return true;
   7947 }
   7948 
   7949 /* Pops a frame from the decoder stack. */
   7950 static void decoder_pop(upb_pbdecoder *d) { d->top--; }
   7951 
   7952 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
   7953                                                  uint64_t expected) {
   7954   uint64_t data = 0;
   7955   size_t bytes = upb_value_size(expected);
   7956   size_t read = peekbytes(d, &data, bytes);
   7957   if (read == bytes && data == expected) {
   7958     /* Advance past matched bytes. */
   7959     int32_t ok = getbytes(d, &data, read);
   7960     UPB_ASSERT_VAR(ok, ok < 0);
   7961     return DECODE_OK;
   7962   } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
   7963     return suspend_save(d);
   7964   } else {
   7965     return DECODE_MISMATCH;
   7966   }
   7967 }
   7968 
   7969 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
   7970                                   uint8_t wire_type) {
   7971   if (fieldnum >= 0)
   7972     goto have_tag;
   7973 
   7974   while (true) {
   7975     uint32_t tag;
   7976     CHECK_RETURN(decode_v32(d, &tag));
   7977     wire_type = tag & 0x7;
   7978     fieldnum = tag >> 3;
   7979 
   7980 have_tag:
   7981     if (fieldnum == 0) {
   7982       seterr(d, "Saw invalid field number (0)");
   7983       return upb_pbdecoder_suspend(d);
   7984     }
   7985 
   7986     /* TODO: deliver to unknown field callback. */
   7987     switch (wire_type) {
   7988       case UPB_WIRE_TYPE_32BIT:
   7989         CHECK_RETURN(skip(d, 4));
   7990         break;
   7991       case UPB_WIRE_TYPE_64BIT:
   7992         CHECK_RETURN(skip(d, 8));
   7993         break;
   7994       case UPB_WIRE_TYPE_VARINT: {
   7995         uint64_t u64;
   7996         CHECK_RETURN(decode_varint(d, &u64));
   7997         break;
   7998       }
   7999       case UPB_WIRE_TYPE_DELIMITED: {
   8000         uint32_t len;
   8001         CHECK_RETURN(decode_v32(d, &len));
   8002         CHECK_RETURN(skip(d, len));
   8003         break;
   8004       }
   8005       case UPB_WIRE_TYPE_START_GROUP:
   8006         CHECK_SUSPEND(pushtagdelim(d, -fieldnum));
   8007         break;
   8008       case UPB_WIRE_TYPE_END_GROUP:
   8009         if (fieldnum == -d->top->groupnum) {
   8010           decoder_pop(d);
   8011         } else if (fieldnum == d->top->groupnum) {
   8012           return DECODE_ENDGROUP;
   8013         } else {
   8014           seterr(d, "Unmatched ENDGROUP tag.");
   8015           return upb_pbdecoder_suspend(d);
   8016         }
   8017         break;
   8018       default:
   8019         seterr(d, "Invalid wire type");
   8020         return upb_pbdecoder_suspend(d);
   8021     }
   8022 
   8023     if (d->top->groupnum >= 0) {
   8024       return DECODE_OK;
   8025     }
   8026 
   8027     /* Unknown group -- continue looping over unknown fields. */
   8028     checkpoint(d);
   8029   }
   8030 }
   8031 
   8032 static void goto_endmsg(upb_pbdecoder *d) {
   8033   upb_value v;
   8034   bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
   8035   UPB_ASSERT_VAR(found, found);
   8036   d->pc = d->top->base + upb_value_getuint64(v);
   8037 }
   8038 
   8039 /* Parses a tag and jumps to the corresponding bytecode instruction for this
   8040  * field.
   8041  *
   8042  * If the tag is unknown (or the wire type doesn't match), parses the field as
   8043  * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
   8044  * instruction for the end of message. */
   8045 static int32_t dispatch(upb_pbdecoder *d) {
   8046   upb_inttable *dispatch = d->top->dispatch;
   8047   uint32_t tag;
   8048   uint8_t wire_type;
   8049   uint32_t fieldnum;
   8050   upb_value val;
   8051   int32_t retval;
   8052 
   8053   /* Decode tag. */
   8054   CHECK_RETURN(decode_v32(d, &tag));
   8055   wire_type = tag & 0x7;
   8056   fieldnum = tag >> 3;
   8057 
   8058   /* Lookup tag.  Because of packed/non-packed compatibility, we have to
   8059    * check the wire type against two possibilities. */
   8060   if (fieldnum != DISPATCH_ENDMSG &&
   8061       upb_inttable_lookup32(dispatch, fieldnum, &val)) {
   8062     uint64_t v = upb_value_getuint64(val);
   8063     if (wire_type == (v & 0xff)) {
   8064       d->pc = d->top->base + (v >> 16);
   8065       return DECODE_OK;
   8066     } else if (wire_type == ((v >> 8) & 0xff)) {
   8067       bool found =
   8068           upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
   8069       UPB_ASSERT_VAR(found, found);
   8070       d->pc = d->top->base + upb_value_getuint64(val);
   8071       return DECODE_OK;
   8072     }
   8073   }
   8074 
   8075   /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
   8076    * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
   8077    * we need to back up to, so that when we're done skipping unknown data we
   8078    * can re-check the delimited end. */
   8079   d->last--;  /* Necessary if we get suspended */
   8080   d->pc = d->last;
   8081   assert(getop(*d->last) == OP_CHECKDELIM);
   8082 
   8083   /* Unknown field or ENDGROUP. */
   8084   retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
   8085 
   8086   CHECK_RETURN(retval);
   8087 
   8088   if (retval == DECODE_ENDGROUP) {
   8089     goto_endmsg(d);
   8090     return DECODE_OK;
   8091   }
   8092 
   8093   return DECODE_OK;
   8094 }
   8095 
   8096 /* Callers know that the stack is more than one deep because the opcodes that
   8097  * call this only occur after PUSH operations. */
   8098 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
   8099   assert(d->top != d->stack);
   8100   return d->top - 1;
   8101 }
   8102 
   8103 
   8104 /* The main decoding loop *****************************************************/
   8105 
   8106 /* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
   8107  * switch() statement. */
   8108 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
   8109                       const upb_bufhandle* handle) {
   8110 
   8111 #define VMCASE(op, code) \
   8112   case op: { code; if (consumes_input(op)) checkpoint(d); break; }
   8113 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
   8114   VMCASE(OP_PARSE_ ## type, { \
   8115     ctype val; \
   8116     CHECK_RETURN(decode_ ## wt(d, &val)); \
   8117     upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \
   8118   })
   8119 
   8120   while(1) {
   8121     int32_t instruction;
   8122     opcode op;
   8123     uint32_t arg;
   8124     int32_t longofs;
   8125 
   8126     d->last = d->pc;
   8127     instruction = *d->pc++;
   8128     op = getop(instruction);
   8129     arg = instruction >> 8;
   8130     longofs = arg;
   8131     assert(d->ptr != d->residual_end);
   8132     UPB_UNUSED(group);
   8133 #ifdef UPB_DUMP_BYTECODE
   8134     fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
   8135                     "%x %s (%d)\n",
   8136             (int)offset(d),
   8137             (int)(d->ptr - d->buf),
   8138             (int)(d->data_end - d->ptr),
   8139             (int)(d->end - d->ptr),
   8140             (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
   8141             (int)(d->pc - 1 - group->bytecode),
   8142             upb_pbdecoder_getopname(op),
   8143             arg);
   8144 #endif
   8145     switch (op) {
   8146       /* Technically, we are losing data if we see a 32-bit varint that is not
   8147        * properly sign-extended.  We could detect this and error about the data
   8148        * loss, but proto2 does not do this, so we pass. */
   8149       PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
   8150       PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
   8151       PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
   8152       PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
   8153       PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
   8154       PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
   8155       PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
   8156       PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
   8157       PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
   8158       PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
   8159       PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
   8160       PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
   8161       PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
   8162 
   8163       VMCASE(OP_SETDISPATCH,
   8164         d->top->base = d->pc - 1;
   8165         memcpy(&d->top->dispatch, d->pc, sizeof(void*));
   8166         d->pc += sizeof(void*) / sizeof(uint32_t);
   8167       )
   8168       VMCASE(OP_STARTMSG,
   8169         CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink));
   8170       )
   8171       VMCASE(OP_ENDMSG,
   8172         CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status));
   8173       )
   8174       VMCASE(OP_STARTSEQ,
   8175         upb_pbdecoder_frame *outer = outer_frame(d);
   8176         CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink));
   8177       )
   8178       VMCASE(OP_ENDSEQ,
   8179         CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg));
   8180       )
   8181       VMCASE(OP_STARTSUBMSG,
   8182         upb_pbdecoder_frame *outer = outer_frame(d);
   8183         CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink));
   8184       )
   8185       VMCASE(OP_ENDSUBMSG,
   8186         CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg));
   8187       )
   8188       VMCASE(OP_STARTSTR,
   8189         uint32_t len = delim_remaining(d);
   8190         upb_pbdecoder_frame *outer = outer_frame(d);
   8191         CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink));
   8192         if (len == 0) {
   8193           d->pc++;  /* Skip OP_STRING. */
   8194         }
   8195       )
   8196       VMCASE(OP_STRING,
   8197         uint32_t len = curbufleft(d);
   8198         size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle);
   8199         if (n > len) {
   8200           if (n > delim_remaining(d)) {
   8201             seterr(d, "Tried to skip past end of string.");
   8202             return upb_pbdecoder_suspend(d);
   8203           } else {
   8204             int32_t ret = skip(d, n);
   8205             /* This shouldn't return DECODE_OK, because n > len. */
   8206             assert(ret >= 0);
   8207             return ret;
   8208           }
   8209         }
   8210         advance(d, n);
   8211         if (n < len || d->delim_end == NULL) {
   8212           /* We aren't finished with this string yet. */
   8213           d->pc--;  /* Repeat OP_STRING. */
   8214           if (n > 0) checkpoint(d);
   8215           return upb_pbdecoder_suspend(d);
   8216         }
   8217       )
   8218       VMCASE(OP_ENDSTR,
   8219         CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg));
   8220       )
   8221       VMCASE(OP_PUSHTAGDELIM,
   8222         CHECK_SUSPEND(pushtagdelim(d, arg));
   8223       )
   8224       VMCASE(OP_SETBIGGROUPNUM,
   8225         d->top->groupnum = *d->pc++;
   8226       )
   8227       VMCASE(OP_POP,
   8228         assert(d->top > d->stack);
   8229         decoder_pop(d);
   8230       )
   8231       VMCASE(OP_PUSHLENDELIM,
   8232         uint32_t len;
   8233         CHECK_RETURN(decode_v32(d, &len));
   8234         CHECK_SUSPEND(decoder_push(d, offset(d) + len));
   8235         set_delim_end(d);
   8236       )
   8237       VMCASE(OP_SETDELIM,
   8238         set_delim_end(d);
   8239       )
   8240       VMCASE(OP_CHECKDELIM,
   8241         /* We are guaranteed of this assert because we never allow ourselves to
   8242          * consume bytes beyond data_end, which covers delim_end when non-NULL.
   8243          */
   8244         assert(!(d->delim_end && d->ptr > d->delim_end));
   8245         if (d->ptr == d->delim_end)
   8246           d->pc += longofs;
   8247       )
   8248       VMCASE(OP_CALL,
   8249         d->callstack[d->call_len++] = d->pc;
   8250         d->pc += longofs;
   8251       )
   8252       VMCASE(OP_RET,
   8253         assert(d->call_len > 0);
   8254         d->pc = d->callstack[--d->call_len];
   8255       )
   8256       VMCASE(OP_BRANCH,
   8257         d->pc += longofs;
   8258       )
   8259       VMCASE(OP_TAG1,
   8260         uint8_t expected;
   8261         CHECK_SUSPEND(curbufleft(d) > 0);
   8262         expected = (arg >> 8) & 0xff;
   8263         if (*d->ptr == expected) {
   8264           advance(d, 1);
   8265         } else {
   8266           int8_t shortofs;
   8267          badtag:
   8268           shortofs = arg;
   8269           if (shortofs == LABEL_DISPATCH) {
   8270             CHECK_RETURN(dispatch(d));
   8271           } else {
   8272             d->pc += shortofs;
   8273             break; /* Avoid checkpoint(). */
   8274           }
   8275         }
   8276       )
   8277       VMCASE(OP_TAG2,
   8278         uint16_t expected;
   8279         CHECK_SUSPEND(curbufleft(d) > 0);
   8280         expected = (arg >> 8) & 0xffff;
   8281         if (curbufleft(d) >= 2) {
   8282           uint16_t actual;
   8283           memcpy(&actual, d->ptr, 2);
   8284           if (expected == actual) {
   8285             advance(d, 2);
   8286           } else {
   8287             goto badtag;
   8288           }
   8289         } else {
   8290           int32_t result = upb_pbdecoder_checktag_slow(d, expected);
   8291           if (result == DECODE_MISMATCH) goto badtag;
   8292           if (result >= 0) return result;
   8293         }
   8294       )
   8295       VMCASE(OP_TAGN, {
   8296         uint64_t expected;
   8297         int32_t result;
   8298         memcpy(&expected, d->pc, 8);
   8299         d->pc += 2;
   8300         result = upb_pbdecoder_checktag_slow(d, expected);
   8301         if (result == DECODE_MISMATCH) goto badtag;
   8302         if (result >= 0) return result;
   8303       })
   8304       VMCASE(OP_DISPATCH, {
   8305         CHECK_RETURN(dispatch(d));
   8306       })
   8307       VMCASE(OP_HALT, {
   8308         return d->size_param;
   8309       })
   8310     }
   8311   }
   8312 }
   8313 
   8314 
   8315 /* BytesHandler handlers ******************************************************/
   8316 
   8317 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
   8318   upb_pbdecoder *d = closure;
   8319   UPB_UNUSED(size_hint);
   8320   d->top->end_ofs = UINT64_MAX;
   8321   d->bufstart_ofs = 0;
   8322   d->call_len = 1;
   8323   d->callstack[0] = &halt;
   8324   d->pc = pc;
   8325   d->skip = 0;
   8326   return d;
   8327 }
   8328 
   8329 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) {
   8330   upb_pbdecoder *d = closure;
   8331   UPB_UNUSED(hd);
   8332   UPB_UNUSED(size_hint);
   8333   d->top->end_ofs = UINT64_MAX;
   8334   d->bufstart_ofs = 0;
   8335   d->call_len = 0;
   8336   d->skip = 0;
   8337   return d;
   8338 }
   8339 
   8340 bool upb_pbdecoder_end(void *closure, const void *handler_data) {
   8341   upb_pbdecoder *d = closure;
   8342   const upb_pbdecodermethod *method = handler_data;
   8343   uint64_t end;
   8344   char dummy;
   8345 
   8346   if (d->residual_end > d->residual) {
   8347     seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
   8348     return false;
   8349   }
   8350 
   8351   if (d->skip) {
   8352     seterr(d, "Unexpected EOF inside skipped data");
   8353     return false;
   8354   }
   8355 
   8356   if (d->top->end_ofs != UINT64_MAX) {
   8357     seterr(d, "Unexpected EOF inside delimited string");
   8358     return false;
   8359   }
   8360 
   8361   /* The user's end() call indicates that the message ends here. */
   8362   end = offset(d);
   8363   d->top->end_ofs = end;
   8364 
   8365 #ifdef UPB_USE_JIT_X64
   8366   if (method->is_native_) {
   8367     const mgroup *group = (const mgroup*)method->group;
   8368     if (d->top != d->stack)
   8369       d->stack->end_ofs = 0;
   8370     group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL);
   8371   } else
   8372 #endif
   8373   {
   8374     const uint32_t *p = d->pc;
   8375     d->stack->end_ofs = end;
   8376     /* Check the previous bytecode, but guard against beginning. */
   8377     if (p != method->code_base.ptr) p--;
   8378     if (getop(*p) == OP_CHECKDELIM) {
   8379       /* Rewind from OP_TAG* to OP_CHECKDELIM. */
   8380       assert(getop(*d->pc) == OP_TAG1 ||
   8381              getop(*d->pc) == OP_TAG2 ||
   8382              getop(*d->pc) == OP_TAGN ||
   8383              getop(*d->pc) == OP_DISPATCH);
   8384       d->pc = p;
   8385     }
   8386     upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
   8387   }
   8388 
   8389   if (d->call_len != 0) {
   8390     seterr(d, "Unexpected EOF inside submessage or group");
   8391     return false;
   8392   }
   8393 
   8394   return true;
   8395 }
   8396 
   8397 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
   8398                             size_t size, const upb_bufhandle *handle) {
   8399   int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
   8400 
   8401   if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
   8402   CHECK_RETURN(result);
   8403 
   8404   return run_decoder_vm(decoder, group, handle);
   8405 }
   8406 
   8407 
   8408 /* Public API *****************************************************************/
   8409 
   8410 void upb_pbdecoder_reset(upb_pbdecoder *d) {
   8411   d->top = d->stack;
   8412   d->top->groupnum = 0;
   8413   d->ptr = d->residual;
   8414   d->buf = d->residual;
   8415   d->end = d->residual;
   8416   d->residual_end = d->residual;
   8417 }
   8418 
   8419 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m,
   8420                                     upb_sink *sink) {
   8421   const size_t default_max_nesting = 64;
   8422 #ifndef NDEBUG
   8423   size_t size_before = upb_env_bytesallocated(e);
   8424 #endif
   8425 
   8426   upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder));
   8427   if (!d) return NULL;
   8428 
   8429   d->method_ = m;
   8430   d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting));
   8431   d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting));
   8432   if (!d->stack || !d->callstack) {
   8433     return NULL;
   8434   }
   8435 
   8436   d->env = e;
   8437   d->limit = d->stack + default_max_nesting - 1;
   8438   d->stack_size = default_max_nesting;
   8439 
   8440   upb_pbdecoder_reset(d);
   8441   upb_bytessink_reset(&d->input_, &m->input_handler_, d);
   8442 
   8443   assert(sink);
   8444   if (d->method_->dest_handlers_) {
   8445     if (sink->handlers != d->method_->dest_handlers_)
   8446       return NULL;
   8447   }
   8448   upb_sink_reset(&d->top->sink, sink->handlers, sink->closure);
   8449 
   8450   /* If this fails, increase the value in decoder.h. */
   8451   assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE);
   8452   return d;
   8453 }
   8454 
   8455 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
   8456   return offset(d);
   8457 }
   8458 
   8459 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
   8460   return d->method_;
   8461 }
   8462 
   8463 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) {
   8464   return &d->input_;
   8465 }
   8466 
   8467 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
   8468   return d->stack_size;
   8469 }
   8470 
   8471 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
   8472   assert(d->top >= d->stack);
   8473 
   8474   if (max < (size_t)(d->top - d->stack)) {
   8475     /* Can't set a limit smaller than what we are currently at. */
   8476     return false;
   8477   }
   8478 
   8479   if (max > d->stack_size) {
   8480     /* Need to reallocate stack and callstack to accommodate. */
   8481     size_t old_size = stacksize(d, d->stack_size);
   8482     size_t new_size = stacksize(d, max);
   8483     void *p = upb_env_realloc(d->env, d->stack, old_size, new_size);
   8484     if (!p) {
   8485       return false;
   8486     }
   8487     d->stack = p;
   8488 
   8489     old_size = callstacksize(d, d->stack_size);
   8490     new_size = callstacksize(d, max);
   8491     p = upb_env_realloc(d->env, d->callstack, old_size, new_size);
   8492     if (!p) {
   8493       return false;
   8494     }
   8495     d->callstack = p;
   8496 
   8497     d->stack_size = max;
   8498   }
   8499 
   8500   d->limit = d->stack + max - 1;
   8501   return true;
   8502 }
   8503 /*
   8504 ** upb::Encoder
   8505 **
   8506 ** Since we are implementing pure handlers (ie. without any out-of-band access
   8507 ** to pre-computed lengths), we have to buffer all submessages before we can
   8508 ** emit even their first byte.
   8509 **
   8510 ** Not knowing the size of submessages also means we can't write a perfect
   8511 ** zero-copy implementation, even with buffering.  Lengths are stored as
   8512 ** varints, which means that we don't know how many bytes to reserve for the
   8513 ** length until we know what the length is.
   8514 **
   8515 ** This leaves us with three main choices:
   8516 **
   8517 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly
   8518 **    once into the output buffer.
   8519 **
   8520 ** 2. attempt to buffer data directly into the output buffer, estimating how
   8521 **    many bytes each length will take.  When our guesses are wrong, use
   8522 **    memmove() to grow or shrink the allotted space.
   8523 **
   8524 ** 3. buffer directly into the output buffer, allocating a max length
   8525 **    ahead-of-time for each submessage length.  If we overallocated, we waste
   8526 **    space, but no memcpy() or memmove() is required.  This approach requires
   8527 **    defining a maximum size for submessages and rejecting submessages that
   8528 **    exceed that size.
   8529 **
   8530 ** (2) and (3) have the potential to have better performance, but they are more
   8531 ** complicated and subtle to implement:
   8532 **
   8533 **   (3) requires making an arbitrary choice of the maximum message size; it
   8534 **       wastes space when submessages are shorter than this and fails
   8535 **       completely when they are longer.  This makes it more finicky and
   8536 **       requires configuration based on the input.  It also makes it impossible
   8537 **       to perfectly match the output of reference encoders that always use the
   8538 **       optimal amount of space for each length.
   8539 **
   8540 **   (2) requires guessing the the size upfront, and if multiple lengths are
   8541 **       guessed wrong the minimum required number of memmove() operations may
   8542 **       be complicated to compute correctly.  Implemented properly, it may have
   8543 **       a useful amortized or average cost, but more investigation is required
   8544 **       to determine this and what the optimal algorithm is to achieve it.
   8545 **
   8546 **   (1) makes you always pay for exactly one copy, but its implementation is
   8547 **       the simplest and its performance is predictable.
   8548 **
   8549 ** So for now, we implement (1) only.  If we wish to optimize later, we should
   8550 ** be able to do it without affecting users.
   8551 **
   8552 ** The strategy is to buffer the segments of data that do *not* depend on
   8553 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers
   8554 ** and lengths.  When the top-level submessage ends, we can go beginning to end,
   8555 ** alternating the writing of lengths with memcpy() of the rest of the data.
   8556 ** At the top level though, no buffering is required.
   8557 */
   8558 
   8559 
   8560 #include <stdlib.h>
   8561 
   8562 /* The output buffer is divided into segments; a segment is a string of data
   8563  * that is "ready to go" -- it does not need any varint lengths inserted into
   8564  * the middle.  The seams between segments are where varints will be inserted
   8565  * once they are known.
   8566  *
   8567  * We also use the concept of a "run", which is a range of encoded bytes that
   8568  * occur at a single submessage level.  Every segment contains one or more runs.
   8569  *
   8570  * A segment can span messages.  Consider:
   8571  *
   8572  *                  .--Submessage lengths---------.
   8573  *                  |       |                     |
   8574  *                  |       V                     V
   8575  *                  V      | |---------------    | |-----------------
   8576  * Submessages:    | |-----------------------------------------------
   8577  * Top-level msg: ------------------------------------------------------------
   8578  *
   8579  * Segments:          -----   -------------------   -----------------
   8580  * Runs:              *----   *--------------*---   *----------------
   8581  * (* marks the start)
   8582  *
   8583  * Note that the top-level menssage is not in any segment because it does not
   8584  * have any length preceding it.
   8585  *
   8586  * A segment is only interrupted when another length needs to be inserted.  So
   8587  * observe how the second segment spans both the inner submessage and part of
   8588  * the next enclosing message. */
   8589 typedef struct {
   8590   uint32_t msglen;  /* The length to varint-encode before this segment. */
   8591   uint32_t seglen;  /* Length of the segment. */
   8592 } upb_pb_encoder_segment;
   8593 
   8594 struct upb_pb_encoder {
   8595   upb_env *env;
   8596 
   8597   /* Our input and output. */
   8598   upb_sink input_;
   8599   upb_bytessink *output_;
   8600 
   8601   /* The "subclosure" -- used as the inner closure as part of the bytessink
   8602    * protocol. */
   8603   void *subc;
   8604 
   8605   /* The output buffer and limit, and our current write position.  "buf"
   8606    * initially points to "initbuf", but is dynamically allocated if we need to
   8607    * grow beyond the initial size. */
   8608   char *buf, *ptr, *limit;
   8609 
   8610   /* The beginning of the current run, or undefined if we are at the top
   8611    * level. */
   8612   char *runbegin;
   8613 
   8614   /* The list of segments we are accumulating. */
   8615   upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
   8616 
   8617   /* The stack of enclosing submessages.  Each entry in the stack points to the
   8618    * segment where this submessage's length is being accumulated. */
   8619   int *stack, *top, *stacklimit;
   8620 
   8621   /* Depth of startmsg/endmsg calls. */
   8622   int depth;
   8623 };
   8624 
   8625 /* low-level buffering ********************************************************/
   8626 
   8627 /* Low-level functions for interacting with the output buffer. */
   8628 
   8629 /* TODO(haberman): handle pushback */
   8630 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
   8631   size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
   8632   UPB_ASSERT_VAR(n, n == len);
   8633 }
   8634 
   8635 static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
   8636   return &e->segbuf[*e->top];
   8637 }
   8638 
   8639 /* Call to ensure that at least "bytes" bytes are available for writing at
   8640  * e->ptr.  Returns false if the bytes could not be allocated. */
   8641 static bool reserve(upb_pb_encoder *e, size_t bytes) {
   8642   if ((size_t)(e->limit - e->ptr) < bytes) {
   8643     /* Grow buffer. */
   8644     char *new_buf;
   8645     size_t needed = bytes + (e->ptr - e->buf);
   8646     size_t old_size = e->limit - e->buf;
   8647 
   8648     size_t new_size = old_size;
   8649 
   8650     while (new_size < needed) {
   8651       new_size *= 2;
   8652     }
   8653 
   8654     new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size);
   8655 
   8656     if (new_buf == NULL) {
   8657       return false;
   8658     }
   8659 
   8660     e->ptr = new_buf + (e->ptr - e->buf);
   8661     e->runbegin = new_buf + (e->runbegin - e->buf);
   8662     e->limit = new_buf + new_size;
   8663     e->buf = new_buf;
   8664   }
   8665 
   8666   return true;
   8667 }
   8668 
   8669 /* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
   8670  * previously called reserve() with at least this many bytes. */
   8671 static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
   8672   assert((size_t)(e->limit - e->ptr) >= bytes);
   8673   e->ptr += bytes;
   8674 }
   8675 
   8676 /* Call when all of the bytes for a handler have been written.  Flushes the
   8677  * bytes if possible and necessary, returning false if this failed. */
   8678 static bool commit(upb_pb_encoder *e) {
   8679   if (!e->top) {
   8680     /* We aren't inside a delimited region.  Flush our accumulated bytes to
   8681      * the output.
   8682      *
   8683      * TODO(haberman): in the future we may want to delay flushing for
   8684      * efficiency reasons. */
   8685     putbuf(e, e->buf, e->ptr - e->buf);
   8686     e->ptr = e->buf;
   8687   }
   8688 
   8689   return true;
   8690 }
   8691 
   8692 /* Writes the given bytes to the buffer, handling reserve/advance. */
   8693 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
   8694   if (!reserve(e, len)) {
   8695     return false;
   8696   }
   8697 
   8698   memcpy(e->ptr, data, len);
   8699   encoder_advance(e, len);
   8700   return true;
   8701 }
   8702 
   8703 /* Finish the current run by adding the run totals to the segment and message
   8704  * length. */
   8705 static void accumulate(upb_pb_encoder *e) {
   8706   size_t run_len;
   8707   assert(e->ptr >= e->runbegin);
   8708   run_len = e->ptr - e->runbegin;
   8709   e->segptr->seglen += run_len;
   8710   top(e)->msglen += run_len;
   8711   e->runbegin = e->ptr;
   8712 }
   8713 
   8714 /* Call to indicate the start of delimited region for which the full length is
   8715  * not yet known.  All data will be buffered until the length is known.
   8716  * Delimited regions may be nested; their lengths will all be tracked properly. */
   8717 static bool start_delim(upb_pb_encoder *e) {
   8718   if (e->top) {
   8719     /* We are already buffering, advance to the next segment and push it on the
   8720      * stack. */
   8721     accumulate(e);
   8722 
   8723     if (++e->top == e->stacklimit) {
   8724       /* TODO(haberman): grow stack? */
   8725       return false;
   8726     }
   8727 
   8728     if (++e->segptr == e->seglimit) {
   8729       /* Grow segment buffer. */
   8730       size_t old_size =
   8731           (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
   8732       size_t new_size = old_size * 2;
   8733       upb_pb_encoder_segment *new_buf =
   8734           upb_env_realloc(e->env, e->segbuf, old_size, new_size);
   8735 
   8736       if (new_buf == NULL) {
   8737         return false;
   8738       }
   8739 
   8740       e->segptr = new_buf + (e->segptr - e->segbuf);
   8741       e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
   8742       e->segbuf = new_buf;
   8743     }
   8744   } else {
   8745     /* We were previously at the top level, start buffering. */
   8746     e->segptr = e->segbuf;
   8747     e->top = e->stack;
   8748     e->runbegin = e->ptr;
   8749   }
   8750 
   8751   *e->top = e->segptr - e->segbuf;
   8752   e->segptr->seglen = 0;
   8753   e->segptr->msglen = 0;
   8754 
   8755   return true;
   8756 }
   8757 
   8758 /* Call to indicate the end of a delimited region.  We now know the length of
   8759  * the delimited region.  If we are not nested inside any other delimited
   8760  * regions, we can now emit all of the buffered data we accumulated. */
   8761 static bool end_delim(upb_pb_encoder *e) {
   8762   size_t msglen;
   8763   accumulate(e);
   8764   msglen = top(e)->msglen;
   8765 
   8766   if (e->top == e->stack) {
   8767     /* All lengths are now available, emit all buffered data. */
   8768     char buf[UPB_PB_VARINT_MAX_LEN];
   8769     upb_pb_encoder_segment *s;
   8770     const char *ptr = e->buf;
   8771     for (s = e->segbuf; s <= e->segptr; s++) {
   8772       size_t lenbytes = upb_vencode64(s->msglen, buf);
   8773       putbuf(e, buf, lenbytes);
   8774       putbuf(e, ptr, s->seglen);
   8775       ptr += s->seglen;
   8776     }
   8777 
   8778     e->ptr = e->buf;
   8779     e->top = NULL;
   8780   } else {
   8781     /* Need to keep buffering; propagate length info into enclosing
   8782      * submessages. */
   8783     --e->top;
   8784     top(e)->msglen += msglen + upb_varint_size(msglen);
   8785   }
   8786 
   8787   return true;
   8788 }
   8789 
   8790 
   8791 /* tag_t **********************************************************************/
   8792 
   8793 /* A precomputed (pre-encoded) tag and length. */
   8794 
   8795 typedef struct {
   8796   uint8_t bytes;
   8797   char tag[7];
   8798 } tag_t;
   8799 
   8800 /* Allocates a new tag for this field, and sets it in these handlerattr. */
   8801 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
   8802                     upb_handlerattr *attr) {
   8803   uint32_t n = upb_fielddef_number(f);
   8804 
   8805   tag_t *tag = malloc(sizeof(tag_t));
   8806   tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
   8807 
   8808   upb_handlerattr_init(attr);
   8809   upb_handlerattr_sethandlerdata(attr, tag);
   8810   upb_handlers_addcleanup(h, tag, free);
   8811 }
   8812 
   8813 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
   8814   return encode_bytes(e, tag->tag, tag->bytes);
   8815 }
   8816 
   8817 
   8818 /* encoding of wire types *****************************************************/
   8819 
   8820 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
   8821   /* TODO(haberman): byte-swap for big endian. */
   8822   return encode_bytes(e, &val, sizeof(uint64_t));
   8823 }
   8824 
   8825 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
   8826   /* TODO(haberman): byte-swap for big endian. */
   8827   return encode_bytes(e, &val, sizeof(uint32_t));
   8828 }
   8829 
   8830 static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
   8831   if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
   8832     return false;
   8833   }
   8834 
   8835   encoder_advance(e, upb_vencode64(val, e->ptr));
   8836   return true;
   8837 }
   8838 
   8839 static uint64_t dbl2uint64(double d) {
   8840   uint64_t ret;
   8841   memcpy(&ret, &d, sizeof(uint64_t));
   8842   return ret;
   8843 }
   8844 
   8845 static uint32_t flt2uint32(float d) {
   8846   uint32_t ret;
   8847   memcpy(&ret, &d, sizeof(uint32_t));
   8848   return ret;
   8849 }
   8850 
   8851 
   8852 /* encoding of proto types ****************************************************/
   8853 
   8854 static bool startmsg(void *c, const void *hd) {
   8855   upb_pb_encoder *e = c;
   8856   UPB_UNUSED(hd);
   8857   if (e->depth++ == 0) {
   8858     upb_bytessink_start(e->output_, 0, &e->subc);
   8859   }
   8860   return true;
   8861 }
   8862 
   8863 static bool endmsg(void *c, const void *hd, upb_status *status) {
   8864   upb_pb_encoder *e = c;
   8865   UPB_UNUSED(hd);
   8866   UPB_UNUSED(status);
   8867   if (--e->depth == 0) {
   8868     upb_bytessink_end(e->output_);
   8869   }
   8870   return true;
   8871 }
   8872 
   8873 static void *encode_startdelimfield(void *c, const void *hd) {
   8874   bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
   8875   return ok ? c : UPB_BREAK;
   8876 }
   8877 
   8878 static bool encode_enddelimfield(void *c, const void *hd) {
   8879   UPB_UNUSED(hd);
   8880   return end_delim(c);
   8881 }
   8882 
   8883 static void *encode_startgroup(void *c, const void *hd) {
   8884   return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
   8885 }
   8886 
   8887 static bool encode_endgroup(void *c, const void *hd) {
   8888   return encode_tag(c, hd) && commit(c);
   8889 }
   8890 
   8891 static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
   8892   UPB_UNUSED(size_hint);
   8893   return encode_startdelimfield(c, hd);
   8894 }
   8895 
   8896 static size_t encode_strbuf(void *c, const void *hd, const char *buf,
   8897                             size_t len, const upb_bufhandle *h) {
   8898   UPB_UNUSED(hd);
   8899   UPB_UNUSED(h);
   8900   return encode_bytes(c, buf, len) ? len : 0;
   8901 }
   8902 
   8903 #define T(type, ctype, convert, encode)                                  \
   8904   static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
   8905     return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
   8906   }                                                                      \
   8907   static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
   8908     UPB_UNUSED(hd);                                                      \
   8909     return encode(e, (convert)(val));                                    \
   8910   }
   8911 
   8912 T(double,   double,   dbl2uint64,   encode_fixed64)
   8913 T(float,    float,    flt2uint32,   encode_fixed32)
   8914 T(int64,    int64_t,  uint64_t,     encode_varint)
   8915 T(int32,    int32_t,  uint32_t,     encode_varint)
   8916 T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
   8917 T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
   8918 T(bool,     bool,     bool,         encode_varint)
   8919 T(uint32,   uint32_t, uint32_t,     encode_varint)
   8920 T(uint64,   uint64_t, uint64_t,     encode_varint)
   8921 T(enum,     int32_t,  uint32_t,     encode_varint)
   8922 T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
   8923 T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
   8924 T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
   8925 T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
   8926 
   8927 #undef T
   8928 
   8929 
   8930 /* code to build the handlers *************************************************/
   8931 
   8932 static void newhandlers_callback(const void *closure, upb_handlers *h) {
   8933   const upb_msgdef *m;
   8934   upb_msg_field_iter i;
   8935 
   8936   UPB_UNUSED(closure);
   8937 
   8938   upb_handlers_setstartmsg(h, startmsg, NULL);
   8939   upb_handlers_setendmsg(h, endmsg, NULL);
   8940 
   8941   m = upb_handlers_msgdef(h);
   8942   for(upb_msg_field_begin(&i, m);
   8943       !upb_msg_field_done(&i);
   8944       upb_msg_field_next(&i)) {
   8945     const upb_fielddef *f = upb_msg_iter_field(&i);
   8946     bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
   8947                   upb_fielddef_packed(f);
   8948     upb_handlerattr attr;
   8949     upb_wiretype_t wt =
   8950         packed ? UPB_WIRE_TYPE_DELIMITED
   8951                : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
   8952 
   8953     /* Pre-encode the tag for this field. */
   8954     new_tag(h, f, wt, &attr);
   8955 
   8956     if (packed) {
   8957       upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
   8958       upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
   8959     }
   8960 
   8961 #define T(upper, lower, upbtype)                                     \
   8962   case UPB_DESCRIPTOR_TYPE_##upper:                                  \
   8963     if (packed) {                                                    \
   8964       upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
   8965     } else {                                                         \
   8966       upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
   8967     }                                                                \
   8968     break;
   8969 
   8970     switch (upb_fielddef_descriptortype(f)) {
   8971       T(DOUBLE,   double,   double);
   8972       T(FLOAT,    float,    float);
   8973       T(INT64,    int64,    int64);
   8974       T(INT32,    int32,    int32);
   8975       T(FIXED64,  fixed64,  uint64);
   8976       T(FIXED32,  fixed32,  uint32);
   8977       T(BOOL,     bool,     bool);
   8978       T(UINT32,   uint32,   uint32);
   8979       T(UINT64,   uint64,   uint64);
   8980       T(ENUM,     enum,     int32);
   8981       T(SFIXED32, sfixed32, int32);
   8982       T(SFIXED64, sfixed64, int64);
   8983       T(SINT32,   sint32,   int32);
   8984       T(SINT64,   sint64,   int64);
   8985       case UPB_DESCRIPTOR_TYPE_STRING:
   8986       case UPB_DESCRIPTOR_TYPE_BYTES:
   8987         upb_handlers_setstartstr(h, f, encode_startstr, &attr);
   8988         upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
   8989         upb_handlers_setstring(h, f, encode_strbuf, &attr);
   8990         break;
   8991       case UPB_DESCRIPTOR_TYPE_MESSAGE:
   8992         upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
   8993         upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
   8994         break;
   8995       case UPB_DESCRIPTOR_TYPE_GROUP: {
   8996         /* Endgroup takes a different tag (wire_type = END_GROUP). */
   8997         upb_handlerattr attr2;
   8998         new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
   8999 
   9000         upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
   9001         upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
   9002 
   9003         upb_handlerattr_uninit(&attr2);
   9004         break;
   9005       }
   9006     }
   9007 
   9008 #undef T
   9009 
   9010     upb_handlerattr_uninit(&attr);
   9011   }
   9012 }
   9013 
   9014 void upb_pb_encoder_reset(upb_pb_encoder *e) {
   9015   e->segptr = NULL;
   9016   e->top = NULL;
   9017   e->depth = 0;
   9018 }
   9019 
   9020 
   9021 /* public API *****************************************************************/
   9022 
   9023 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m,
   9024                                                const void *owner) {
   9025   return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL);
   9026 }
   9027 
   9028 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h,
   9029                                       upb_bytessink *output) {
   9030   const size_t initial_bufsize = 256;
   9031   const size_t initial_segbufsize = 16;
   9032   /* TODO(haberman): make this configurable. */
   9033   const size_t stack_size = 64;
   9034 #ifndef NDEBUG
   9035   const size_t size_before = upb_env_bytesallocated(env);
   9036 #endif
   9037 
   9038   upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder));
   9039   if (!e) return NULL;
   9040 
   9041   e->buf = upb_env_malloc(env, initial_bufsize);
   9042   e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf));
   9043   e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack));
   9044 
   9045   if (!e->buf || !e->segbuf || !e->stack) {
   9046     return NULL;
   9047   }
   9048 
   9049   e->limit = e->buf + initial_bufsize;
   9050   e->seglimit = e->segbuf + initial_segbufsize;
   9051   e->stacklimit = e->stack + stack_size;
   9052 
   9053   upb_pb_encoder_reset(e);
   9054   upb_sink_reset(&e->input_, h, e);
   9055 
   9056   e->env = env;
   9057   e->output_ = output;
   9058   e->subc = output->closure;
   9059   e->ptr = e->buf;
   9060 
   9061   /* If this fails, increase the value in encoder.h. */
   9062   assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE);
   9063   return e;
   9064 }
   9065 
   9066 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; }
   9067 
   9068 
   9069 #include <stdio.h>
   9070 #include <stdlib.h>
   9071 #include <string.h>
   9072 
   9073 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n,
   9074                                         void *owner, upb_status *status) {
   9075   /* Create handlers. */
   9076   const upb_pbdecodermethod *decoder_m;
   9077   const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h);
   9078   upb_env env;
   9079   upb_pbdecodermethodopts opts;
   9080   upb_pbdecoder *decoder;
   9081   upb_descreader *reader;
   9082   bool ok;
   9083   upb_def **ret = NULL;
   9084   upb_def **defs;
   9085 
   9086   upb_pbdecodermethodopts_init(&opts, reader_h);
   9087   decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m);
   9088 
   9089   upb_env_init(&env);
   9090   upb_env_reporterrorsto(&env, status);
   9091 
   9092   reader = upb_descreader_create(&env, reader_h);
   9093   decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader));
   9094 
   9095   /* Push input data. */
   9096   ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder));
   9097 
   9098   if (!ok) goto cleanup;
   9099   defs = upb_descreader_getdefs(reader, owner, n);
   9100   ret = malloc(sizeof(upb_def*) * (*n));
   9101   memcpy(ret, defs, sizeof(upb_def*) * (*n));
   9102 
   9103 cleanup:
   9104   upb_env_uninit(&env);
   9105   upb_handlers_unref(reader_h, &reader_h);
   9106   upb_pbdecodermethod_unref(decoder_m, &decoder_m);
   9107   return ret;
   9108 }
   9109 
   9110 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len,
   9111                                      upb_status *status) {
   9112   int n;
   9113   bool success;
   9114   upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status);
   9115   if (!defs) return false;
   9116   success = upb_symtab_add(s, defs, n, &defs, status);
   9117   free(defs);
   9118   return success;
   9119 }
   9120 
   9121 char *upb_readfile(const char *filename, size_t *len) {
   9122   long size;
   9123   char *buf;
   9124   FILE *f = fopen(filename, "rb");
   9125   if(!f) return NULL;
   9126   if(fseek(f, 0, SEEK_END) != 0) goto error;
   9127   size = ftell(f);
   9128   if(size < 0) goto error;
   9129   if(fseek(f, 0, SEEK_SET) != 0) goto error;
   9130   buf = malloc(size + 1);
   9131   if(size && fread(buf, size, 1, f) != 1) goto error;
   9132   fclose(f);
   9133   if (len) *len = size;
   9134   return buf;
   9135 
   9136 error:
   9137   fclose(f);
   9138   return NULL;
   9139 }
   9140 
   9141 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname,
   9142                                           upb_status *status) {
   9143   size_t len;
   9144   bool success;
   9145   char *data = upb_readfile(fname, &len);
   9146   if (!data) {
   9147     if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname);
   9148     return false;
   9149   }
   9150   success = upb_load_descriptor_into_symtab(symtab, data, len, status);
   9151   free(data);
   9152   return success;
   9153 }
   9154 /*
   9155  * upb::pb::TextPrinter
   9156  *
   9157  * OPT: This is not optimized at all.  It uses printf() which parses the format
   9158  * string every time, and it allocates memory for every put.
   9159  */
   9160 
   9161 
   9162 #include <ctype.h>
   9163 #include <float.h>
   9164 #include <inttypes.h>
   9165 #include <stdarg.h>
   9166 #include <stdio.h>
   9167 #include <stdlib.h>
   9168 #include <string.h>
   9169 
   9170 
   9171 struct upb_textprinter {
   9172   upb_sink input_;
   9173   upb_bytessink *output_;
   9174   int indent_depth_;
   9175   bool single_line_;
   9176   void *subc;
   9177 };
   9178 
   9179 #define CHECK(x) if ((x) < 0) goto err;
   9180 
   9181 static const char *shortname(const char *longname) {
   9182   const char *last = strrchr(longname, '.');
   9183   return last ? last + 1 : longname;
   9184 }
   9185 
   9186 static int indent(upb_textprinter *p) {
   9187   int i;
   9188   if (!p->single_line_)
   9189     for (i = 0; i < p->indent_depth_; i++)
   9190       upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
   9191   return 0;
   9192 }
   9193 
   9194 static int endfield(upb_textprinter *p) {
   9195   const char ch = (p->single_line_ ? ' ' : '\n');
   9196   upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
   9197   return 0;
   9198 }
   9199 
   9200 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
   9201                       bool preserve_utf8) {
   9202   /* Based on CEscapeInternal() from Google's protobuf release. */
   9203   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
   9204   const char *end = buf + len;
   9205 
   9206   /* I think hex is prettier and more useful, but proto2 uses octal; should
   9207    * investigate whether it can parse hex also. */
   9208   const bool use_hex = false;
   9209   bool last_hex_escape = false; /* true if last output char was \xNN */
   9210 
   9211   for (; buf < end; buf++) {
   9212     bool is_hex_escape;
   9213 
   9214     if (dstend - dst < 4) {
   9215       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
   9216       dst = dstbuf;
   9217     }
   9218 
   9219     is_hex_escape = false;
   9220     switch (*buf) {
   9221       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
   9222       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
   9223       case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
   9224       case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
   9225       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
   9226       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
   9227       default:
   9228         /* Note that if we emit \xNN and the buf character after that is a hex
   9229          * digit then that digit must be escaped too to prevent it being
   9230          * interpreted as part of the character code by C. */
   9231         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
   9232             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
   9233           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
   9234           is_hex_escape = use_hex;
   9235           dst += 4;
   9236         } else {
   9237           *(dst++) = *buf; break;
   9238         }
   9239     }
   9240     last_hex_escape = is_hex_escape;
   9241   }
   9242   /* Flush remaining data. */
   9243   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
   9244   return 0;
   9245 }
   9246 
   9247 bool putf(upb_textprinter *p, const char *fmt, ...) {
   9248   va_list args;
   9249   va_list args_copy;
   9250   char *str;
   9251   int written;
   9252   int len;
   9253   bool ok;
   9254 
   9255   va_start(args, fmt);
   9256 
   9257   /* Run once to get the length of the string. */
   9258   _upb_va_copy(args_copy, args);
   9259   len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
   9260   va_end(args_copy);
   9261 
   9262   /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
   9263   str = malloc(len + 1);
   9264   if (!str) return false;
   9265   written = vsprintf(str, fmt, args);
   9266   va_end(args);
   9267   UPB_ASSERT_VAR(written, written == len);
   9268 
   9269   ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
   9270   free(str);
   9271   return ok;
   9272 }
   9273 
   9274 
   9275 /* handlers *******************************************************************/
   9276 
   9277 static bool textprinter_startmsg(void *c, const void *hd) {
   9278   upb_textprinter *p = c;
   9279   UPB_UNUSED(hd);
   9280   if (p->indent_depth_ == 0) {
   9281     upb_bytessink_start(p->output_, 0, &p->subc);
   9282   }
   9283   return true;
   9284 }
   9285 
   9286 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
   9287   upb_textprinter *p = c;
   9288   UPB_UNUSED(hd);
   9289   UPB_UNUSED(s);
   9290   if (p->indent_depth_ == 0) {
   9291     upb_bytessink_end(p->output_);
   9292   }
   9293   return true;
   9294 }
   9295 
   9296 #define TYPE(name, ctype, fmt) \
   9297   static bool textprinter_put ## name(void *closure, const void *handler_data, \
   9298                                       ctype val) {                             \
   9299     upb_textprinter *p = closure;                                              \
   9300     const upb_fielddef *f = handler_data;                                      \
   9301     CHECK(indent(p));                                                          \
   9302     putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
   9303     CHECK(endfield(p));                                                        \
   9304     return true;                                                               \
   9305   err:                                                                         \
   9306     return false;                                                              \
   9307 }
   9308 
   9309 static bool textprinter_putbool(void *closure, const void *handler_data,
   9310                                 bool val) {
   9311   upb_textprinter *p = closure;
   9312   const upb_fielddef *f = handler_data;
   9313   CHECK(indent(p));
   9314   putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
   9315   CHECK(endfield(p));
   9316   return true;
   9317 err:
   9318   return false;
   9319 }
   9320 
   9321 #define STRINGIFY_HELPER(x) #x
   9322 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
   9323 
   9324 TYPE(int32,  int32_t,  "%" PRId32)
   9325 TYPE(int64,  int64_t,  "%" PRId64)
   9326 TYPE(uint32, uint32_t, "%" PRIu32)
   9327 TYPE(uint64, uint64_t, "%" PRIu64)
   9328 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
   9329 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
   9330 
   9331 #undef TYPE
   9332 
   9333 /* Output a symbolic value from the enum if found, else just print as int32. */
   9334 static bool textprinter_putenum(void *closure, const void *handler_data,
   9335                                 int32_t val) {
   9336   upb_textprinter *p = closure;
   9337   const upb_fielddef *f = handler_data;
   9338   const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f));
   9339   const char *label = upb_enumdef_iton(enum_def, val);
   9340   if (label) {
   9341     indent(p);
   9342     putf(p, "%s: %s", upb_fielddef_name(f), label);
   9343     endfield(p);
   9344   } else {
   9345     if (!textprinter_putint32(closure, handler_data, val))
   9346       return false;
   9347   }
   9348   return true;
   9349 }
   9350 
   9351 static void *textprinter_startstr(void *closure, const void *handler_data,
   9352                       size_t size_hint) {
   9353   upb_textprinter *p = closure;
   9354   const upb_fielddef *f = handler_data;
   9355   UPB_UNUSED(size_hint);
   9356   indent(p);
   9357   putf(p, "%s: \"", upb_fielddef_name(f));
   9358   return p;
   9359 }
   9360 
   9361 static bool textprinter_endstr(void *closure, const void *handler_data) {
   9362   upb_textprinter *p = closure;
   9363   UPB_UNUSED(handler_data);
   9364   putf(p, "\"");
   9365   endfield(p);
   9366   return true;
   9367 }
   9368 
   9369 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
   9370                                  size_t len, const upb_bufhandle *handle) {
   9371   upb_textprinter *p = closure;
   9372   const upb_fielddef *f = hd;
   9373   UPB_UNUSED(handle);
   9374   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
   9375   return len;
   9376 err:
   9377   return 0;
   9378 }
   9379 
   9380 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
   9381   upb_textprinter *p = closure;
   9382   const char *name = handler_data;
   9383   CHECK(indent(p));
   9384   putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
   9385   p->indent_depth_++;
   9386   return p;
   9387 err:
   9388   return UPB_BREAK;
   9389 }
   9390 
   9391 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
   9392   upb_textprinter *p = closure;
   9393   UPB_UNUSED(handler_data);
   9394   p->indent_depth_--;
   9395   CHECK(indent(p));
   9396   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
   9397   CHECK(endfield(p));
   9398   return true;
   9399 err:
   9400   return false;
   9401 }
   9402 
   9403 static void onmreg(const void *c, upb_handlers *h) {
   9404   const upb_msgdef *m = upb_handlers_msgdef(h);
   9405   upb_msg_field_iter i;
   9406   UPB_UNUSED(c);
   9407 
   9408   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
   9409   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
   9410 
   9411   for(upb_msg_field_begin(&i, m);
   9412       !upb_msg_field_done(&i);
   9413       upb_msg_field_next(&i)) {
   9414     upb_fielddef *f = upb_msg_iter_field(&i);
   9415     upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER;
   9416     upb_handlerattr_sethandlerdata(&attr, f);
   9417     switch (upb_fielddef_type(f)) {
   9418       case UPB_TYPE_INT32:
   9419         upb_handlers_setint32(h, f, textprinter_putint32, &attr);
   9420         break;
   9421       case UPB_TYPE_INT64:
   9422         upb_handlers_setint64(h, f, textprinter_putint64, &attr);
   9423         break;
   9424       case UPB_TYPE_UINT32:
   9425         upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
   9426         break;
   9427       case UPB_TYPE_UINT64:
   9428         upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
   9429         break;
   9430       case UPB_TYPE_FLOAT:
   9431         upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
   9432         break;
   9433       case UPB_TYPE_DOUBLE:
   9434         upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
   9435         break;
   9436       case UPB_TYPE_BOOL:
   9437         upb_handlers_setbool(h, f, textprinter_putbool, &attr);
   9438         break;
   9439       case UPB_TYPE_STRING:
   9440       case UPB_TYPE_BYTES:
   9441         upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
   9442         upb_handlers_setstring(h, f, textprinter_putstr, &attr);
   9443         upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
   9444         break;
   9445       case UPB_TYPE_MESSAGE: {
   9446         const char *name =
   9447             upb_fielddef_istagdelim(f)
   9448                 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
   9449                 : upb_fielddef_name(f);
   9450         upb_handlerattr_sethandlerdata(&attr, name);
   9451         upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
   9452         upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
   9453         break;
   9454       }
   9455       case UPB_TYPE_ENUM:
   9456         upb_handlers_setint32(h, f, textprinter_putenum, &attr);
   9457         break;
   9458     }
   9459   }
   9460 }
   9461 
   9462 static void textprinter_reset(upb_textprinter *p, bool single_line) {
   9463   p->single_line_ = single_line;
   9464   p->indent_depth_ = 0;
   9465 }
   9466 
   9467 
   9468 /* Public API *****************************************************************/
   9469 
   9470 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h,
   9471                                         upb_bytessink *output) {
   9472   upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter));
   9473   if (!p) return NULL;
   9474 
   9475   p->output_ = output;
   9476   upb_sink_reset(&p->input_, h, p);
   9477   textprinter_reset(p, false);
   9478 
   9479   return p;
   9480 }
   9481 
   9482 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m,
   9483                                                 const void *owner) {
   9484   return upb_handlers_newfrozen(m, owner, &onmreg, NULL);
   9485 }
   9486 
   9487 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; }
   9488 
   9489 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
   9490   p->single_line_ = single_line;
   9491 }
   9492 
   9493 
   9494 /* Index is descriptor type. */
   9495 const uint8_t upb_pb_native_wire_types[] = {
   9496   UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
   9497   UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
   9498   UPB_WIRE_TYPE_32BIT,         /* FLOAT */
   9499   UPB_WIRE_TYPE_VARINT,        /* INT64 */
   9500   UPB_WIRE_TYPE_VARINT,        /* UINT64 */
   9501   UPB_WIRE_TYPE_VARINT,        /* INT32 */
   9502   UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
   9503   UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
   9504   UPB_WIRE_TYPE_VARINT,        /* BOOL */
   9505   UPB_WIRE_TYPE_DELIMITED,     /* STRING */
   9506   UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
   9507   UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
   9508   UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
   9509   UPB_WIRE_TYPE_VARINT,        /* UINT32 */
   9510   UPB_WIRE_TYPE_VARINT,        /* ENUM */
   9511   UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
   9512   UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
   9513   UPB_WIRE_TYPE_VARINT,        /* SINT32 */
   9514   UPB_WIRE_TYPE_VARINT,        /* SINT64 */
   9515 };
   9516 
   9517 /* A basic branch-based decoder, uses 32-bit values to get good performance
   9518  * on 32-bit architectures (but performs well on 64-bits also).
   9519  * This scheme comes from the original Google Protobuf implementation
   9520  * (proto2). */
   9521 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
   9522   upb_decoderet err = {NULL, 0};
   9523   const char *p = r.p;
   9524   uint32_t low = (uint32_t)r.val;
   9525   uint32_t high = 0;
   9526   uint32_t b;
   9527   b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
   9528   b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
   9529   b = *(p++); low  |= (b & 0x7fU) << 28;
   9530               high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
   9531   b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
   9532   b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
   9533   b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
   9534   b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
   9535   b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
   9536   return err;
   9537 
   9538 done:
   9539   r.val = ((uint64_t)high << 32) | low;
   9540   r.p = p;
   9541   return r;
   9542 }
   9543 
   9544 /* Like the previous, but uses 64-bit values. */
   9545 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
   9546   const char *p = r.p;
   9547   uint64_t val = r.val;
   9548   uint64_t b;
   9549   upb_decoderet err = {NULL, 0};
   9550   b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
   9551   b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
   9552   b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
   9553   b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
   9554   b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
   9555   b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
   9556   b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
   9557   b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
   9558   return err;
   9559 
   9560 done:
   9561   r.val = val;
   9562   r.p = p;
   9563   return r;
   9564 }
   9565 
   9566 /* Given an encoded varint v, returns an integer with a single bit set that
   9567  * indicates the end of the varint.  Subtracting one from this value will
   9568  * yield a mask that leaves only bits that are part of the varint.  Returns
   9569  * 0 if the varint is unterminated. */
   9570 static uint64_t upb_get_vstopbit(uint64_t v) {
   9571   uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL;
   9572   return ~cbits & (cbits+1);
   9573 }
   9574 
   9575 /* A branchless decoder.  Credit to Pascal Massimino for the bit-twiddling. */
   9576 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) {
   9577   uint64_t b;
   9578   uint64_t stop_bit;
   9579   upb_decoderet my_r;
   9580   memcpy(&b, r.p, sizeof(b));
   9581   stop_bit = upb_get_vstopbit(b);
   9582   b =  (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1);
   9583   b +=       b & 0x007f007f007f007fULL;
   9584   b +=  3 * (b & 0x0000ffff0000ffffULL);
   9585   b += 15 * (b & 0x00000000ffffffffULL);
   9586   if (stop_bit == 0) {
   9587     /* Error: unterminated varint. */
   9588     upb_decoderet err_r = {(void*)0, 0};
   9589     return err_r;
   9590   }
   9591   my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
   9592                             r.val | (b << 7));
   9593   return my_r;
   9594 }
   9595 
   9596 /* A branchless decoder.  Credit to Daniel Wright for the bit-twiddling. */
   9597 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) {
   9598   uint64_t b;
   9599   uint64_t stop_bit;
   9600   upb_decoderet my_r;
   9601   memcpy(&b, r.p, sizeof(b));
   9602   stop_bit = upb_get_vstopbit(b);
   9603   b &= (stop_bit - 1);
   9604   b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL);
   9605   b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL);
   9606   b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL);
   9607   if (stop_bit == 0) {
   9608     /* Error: unterminated varint. */
   9609     upb_decoderet err_r = {(void*)0, 0};
   9610     return err_r;
   9611   }
   9612   my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8),
   9613                             r.val | (b << 14));
   9614   return my_r;
   9615 }
   9616 
   9617 #line 1 "upb/json/parser.rl"
   9618 /*
   9619 ** upb::json::Parser (upb_json_parser)
   9620 **
   9621 ** A parser that uses the Ragel State Machine Compiler to generate
   9622 ** the finite automata.
   9623 **
   9624 ** Ragel only natively handles regular languages, but we can manually
   9625 ** program it a bit to handle context-free languages like JSON, by using
   9626 ** the "fcall" and "fret" constructs.
   9627 **
   9628 ** This parser can handle the basics, but needs several things to be fleshed
   9629 ** out:
   9630 **
   9631 ** - handling of unicode escape sequences (including high surrogate pairs).
   9632 ** - properly check and report errors for unknown fields, stack overflow,
   9633 **   improper array nesting (or lack of nesting).
   9634 ** - handling of base64 sequences with padding characters.
   9635 ** - handling of push-back (non-success returns from sink functions).
   9636 ** - handling of keys/escape-sequences/etc that span input buffers.
   9637 */
   9638 
   9639 #include <stdio.h>
   9640 #include <stdint.h>
   9641 #include <assert.h>
   9642 #include <string.h>
   9643 #include <stdlib.h>
   9644 #include <errno.h>
   9645 
   9646 
   9647 #define UPB_JSON_MAX_DEPTH 64
   9648 
   9649 typedef struct {
   9650   upb_sink sink;
   9651 
   9652   /* The current message in which we're parsing, and the field whose value we're
   9653    * expecting next. */
   9654   const upb_msgdef *m;
   9655   const upb_fielddef *f;
   9656 
   9657   /* We are in a repeated-field context, ready to emit mapentries as
   9658    * submessages. This flag alters the start-of-object (open-brace) behavior to
   9659    * begin a sequence of mapentry messages rather than a single submessage. */
   9660   bool is_map;
   9661 
   9662   /* We are in a map-entry message context. This flag is set when parsing the
   9663    * value field of a single map entry and indicates to all value-field parsers
   9664    * (subobjects, strings, numbers, and bools) that the map-entry submessage
   9665    * should end as soon as the value is parsed. */
   9666   bool is_mapentry;
   9667 
   9668   /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
   9669    * message's map field that we're currently parsing. This differs from |f|
   9670    * because |f| is the field in the *current* message (i.e., the map-entry
   9671    * message itself), not the parent's field that leads to this map. */
   9672   const upb_fielddef *mapfield;
   9673 } upb_jsonparser_frame;
   9674 
   9675 struct upb_json_parser {
   9676   upb_env *env;
   9677   upb_byteshandler input_handler_;
   9678   upb_bytessink input_;
   9679 
   9680   /* Stack to track the JSON scopes we are in. */
   9681   upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
   9682   upb_jsonparser_frame *top;
   9683   upb_jsonparser_frame *limit;
   9684 
   9685   upb_status status;
   9686 
   9687   /* Ragel's internal parsing stack for the parsing state machine. */
   9688   int current_state;
   9689   int parser_stack[UPB_JSON_MAX_DEPTH];
   9690   int parser_top;
   9691 
   9692   /* The handle for the current buffer. */
   9693   const upb_bufhandle *handle;
   9694 
   9695   /* Accumulate buffer.  See details in parser.rl. */
   9696   const char *accumulated;
   9697   size_t accumulated_len;
   9698   char *accumulate_buf;
   9699   size_t accumulate_buf_size;
   9700 
   9701   /* Multi-part text data.  See details in parser.rl. */
   9702   int multipart_state;
   9703   upb_selector_t string_selector;
   9704 
   9705   /* Input capture.  See details in parser.rl. */
   9706   const char *capture;
   9707 
   9708   /* Intermediate result of parsing a unicode escape sequence. */
   9709   uint32_t digit;
   9710 };
   9711 
   9712 #define PARSER_CHECK_RETURN(x) if (!(x)) return false
   9713 
   9714 /* Used to signal that a capture has been suspended. */
   9715 static char suspend_capture;
   9716 
   9717 static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
   9718                                              upb_handlertype_t type) {
   9719   upb_selector_t sel;
   9720   bool ok = upb_handlers_getselector(p->top->f, type, &sel);
   9721   UPB_ASSERT_VAR(ok, ok);
   9722   return sel;
   9723 }
   9724 
   9725 static upb_selector_t parser_getsel(upb_json_parser *p) {
   9726   return getsel_for_handlertype(
   9727       p, upb_handlers_getprimitivehandlertype(p->top->f));
   9728 }
   9729 
   9730 static bool check_stack(upb_json_parser *p) {
   9731   if ((p->top + 1) == p->limit) {
   9732     upb_status_seterrmsg(&p->status, "Nesting too deep");
   9733     upb_env_reporterror(p->env, &p->status);
   9734     return false;
   9735   }
   9736 
   9737   return true;
   9738 }
   9739 
   9740 /* There are GCC/Clang built-ins for overflow checking which we could start
   9741  * using if there was any performance benefit to it. */
   9742 
   9743 static bool checked_add(size_t a, size_t b, size_t *c) {
   9744   if (SIZE_MAX - a < b) return false;
   9745   *c = a + b;
   9746   return true;
   9747 }
   9748 
   9749 static size_t saturating_multiply(size_t a, size_t b) {
   9750   /* size_t is unsigned, so this is defined behavior even on overflow. */
   9751   size_t ret = a * b;
   9752   if (b != 0 && ret / b != a) {
   9753     ret = SIZE_MAX;
   9754   }
   9755   return ret;
   9756 }
   9757 
   9758 
   9759 /* Base64 decoding ************************************************************/
   9760 
   9761 /* TODO(haberman): make this streaming. */
   9762 
   9763 static const signed char b64table[] = {
   9764   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9765   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9766   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9767   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9768   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9769   -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
   9770   52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
   9771   60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
   9772   -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
   9773   07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
   9774   15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
   9775   23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
   9776   -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
   9777   33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
   9778   41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
   9779   49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
   9780   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9781   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9782   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9783   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9784   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9785   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9786   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9787   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9788   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9789   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9790   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9791   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9792   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9793   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9794   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
   9795   -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
   9796 };
   9797 
   9798 /* Returns the table value sign-extended to 32 bits.  Knowing that the upper
   9799  * bits will be 1 for unrecognized characters makes it easier to check for
   9800  * this error condition later (see below). */
   9801 int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
   9802 
   9803 /* Returns true if the given character is not a valid base64 character or
   9804  * padding. */
   9805 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
   9806 
   9807 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
   9808                         size_t len) {
   9809   const char *limit = ptr + len;
   9810   for (; ptr < limit; ptr += 4) {
   9811     uint32_t val;
   9812     char output[3];
   9813 
   9814     if (limit - ptr < 4) {
   9815       upb_status_seterrf(&p->status,
   9816                          "Base64 input for bytes field not a multiple of 4: %s",
   9817                          upb_fielddef_name(p->top->f));
   9818       upb_env_reporterror(p->env, &p->status);
   9819       return false;
   9820     }
   9821 
   9822     val = b64lookup(ptr[0]) << 18 |
   9823           b64lookup(ptr[1]) << 12 |
   9824           b64lookup(ptr[2]) << 6  |
   9825           b64lookup(ptr[3]);
   9826 
   9827     /* Test the upper bit; returns true if any of the characters returned -1. */
   9828     if (val & 0x80000000) {
   9829       goto otherchar;
   9830     }
   9831 
   9832     output[0] = val >> 16;
   9833     output[1] = (val >> 8) & 0xff;
   9834     output[2] = val & 0xff;
   9835     upb_sink_putstring(&p->top->sink, sel, output, 3, NULL);
   9836   }
   9837   return true;
   9838 
   9839 otherchar:
   9840   if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
   9841       nonbase64(ptr[3]) ) {
   9842     upb_status_seterrf(&p->status,
   9843                        "Non-base64 characters in bytes field: %s",
   9844                        upb_fielddef_name(p->top->f));
   9845     upb_env_reporterror(p->env, &p->status);
   9846     return false;
   9847   } if (ptr[2] == '=') {
   9848     uint32_t val;
   9849     char output;
   9850 
   9851     /* Last group contains only two input bytes, one output byte. */
   9852     if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
   9853       goto badpadding;
   9854     }
   9855 
   9856     val = b64lookup(ptr[0]) << 18 |
   9857           b64lookup(ptr[1]) << 12;
   9858 
   9859     assert(!(val & 0x80000000));
   9860     output = val >> 16;
   9861     upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL);
   9862     return true;
   9863   } else {
   9864     uint32_t val;
   9865     char output[2];
   9866 
   9867     /* Last group contains only three input bytes, two output bytes. */
   9868     if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
   9869       goto badpadding;
   9870     }
   9871 
   9872     val = b64lookup(ptr[0]) << 18 |
   9873           b64lookup(ptr[1]) << 12 |
   9874           b64lookup(ptr[2]) << 6;
   9875 
   9876     output[0] = val >> 16;
   9877     output[1] = (val >> 8) & 0xff;
   9878     upb_sink_putstring(&p->top->sink, sel, output, 2, NULL);
   9879     return true;
   9880   }
   9881 
   9882 badpadding:
   9883   upb_status_seterrf(&p->status,
   9884                      "Incorrect base64 padding for field: %s (%.*s)",
   9885                      upb_fielddef_name(p->top->f),
   9886                      4, ptr);
   9887   upb_env_reporterror(p->env, &p->status);
   9888   return false;
   9889 }
   9890 
   9891 
   9892 /* Accumulate buffer **********************************************************/
   9893 
   9894 /* Functionality for accumulating a buffer.
   9895  *
   9896  * Some parts of the parser need an entire value as a contiguous string.  For
   9897  * example, to look up a member name in a hash table, or to turn a string into
   9898  * a number, the relevant library routines need the input string to be in
   9899  * contiguous memory, even if the value spanned two or more buffers in the
   9900  * input.  These routines handle that.
   9901  *
   9902  * In the common case we can just point to the input buffer to get this
   9903  * contiguous string and avoid any actual copy.  So we optimistically begin
   9904  * this way.  But there are a few cases where we must instead copy into a
   9905  * separate buffer:
   9906  *
   9907  *   1. The string was not contiguous in the input (it spanned buffers).
   9908  *
   9909  *   2. The string included escape sequences that need to be interpreted to get
   9910  *      the true value in a contiguous buffer. */
   9911 
   9912 static void assert_accumulate_empty(upb_json_parser *p) {
   9913   UPB_UNUSED(p);
   9914   assert(p->accumulated == NULL);
   9915   assert(p->accumulated_len == 0);
   9916 }
   9917 
   9918 static void accumulate_clear(upb_json_parser *p) {
   9919   p->accumulated = NULL;
   9920   p->accumulated_len = 0;
   9921 }
   9922 
   9923 /* Used internally by accumulate_append(). */
   9924 static bool accumulate_realloc(upb_json_parser *p, size_t need) {
   9925   void *mem;
   9926   size_t old_size = p->accumulate_buf_size;
   9927   size_t new_size = UPB_MAX(old_size, 128);
   9928   while (new_size < need) {
   9929     new_size = saturating_multiply(new_size, 2);
   9930   }
   9931 
   9932   mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size);
   9933   if (!mem) {
   9934     upb_status_seterrmsg(&p->status, "Out of memory allocating buffer.");
   9935     upb_env_reporterror(p->env, &p->status);
   9936     return false;
   9937   }
   9938 
   9939   p->accumulate_buf = mem;
   9940   p->accumulate_buf_size = new_size;
   9941   return true;
   9942 }
   9943 
   9944 /* Logically appends the given data to the append buffer.
   9945  * If "can_alias" is true, we will try to avoid actually copying, but the buffer
   9946  * must be valid until the next accumulate_append() call (if any). */
   9947 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
   9948                               bool can_alias) {
   9949   size_t need;
   9950 
   9951   if (!p->accumulated && can_alias) {
   9952     p->accumulated = buf;
   9953     p->accumulated_len = len;
   9954     return true;
   9955   }
   9956 
   9957   if (!checked_add(p->accumulated_len, len, &need)) {
   9958     upb_status_seterrmsg(&p->status, "Integer overflow.");
   9959     upb_env_reporterror(p->env, &p->status);
   9960     return false;
   9961   }
   9962 
   9963   if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
   9964     return false;
   9965   }
   9966 
   9967   if (p->accumulated != p->accumulate_buf) {
   9968     memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
   9969     p->accumulated = p->accumulate_buf;
   9970   }
   9971 
   9972   memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
   9973   p->accumulated_len += len;
   9974   return true;
   9975 }
   9976 
   9977 /* Returns a pointer to the data accumulated since the last accumulate_clear()
   9978  * call, and writes the length to *len.  This with point either to the input
   9979  * buffer or a temporary accumulate buffer. */
   9980 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
   9981   assert(p->accumulated);
   9982   *len = p->accumulated_len;
   9983   return p->accumulated;
   9984 }
   9985 
   9986 
   9987 /* Mult-part text data ********************************************************/
   9988 
   9989 /* When we have text data in the input, it can often come in multiple segments.
   9990  * For example, there may be some raw string data followed by an escape
   9991  * sequence.  The two segments are processed with different logic.  Also buffer
   9992  * seams in the input can cause multiple segments.
   9993  *
   9994  * As we see segments, there are two main cases for how we want to process them:
   9995  *
   9996  *  1. we want to push the captured input directly to string handlers.
   9997  *
   9998  *  2. we need to accumulate all the parts into a contiguous buffer for further
   9999  *     processing (field name lookup, string->number conversion, etc). */
   10000 
   10001 /* This is the set of states for p->multipart_state. */
   10002 enum {
   10003   /* We are not currently processing multipart data. */
   10004   MULTIPART_INACTIVE = 0,
   10005 
   10006   /* We are processing multipart data by accumulating it into a contiguous
   10007    * buffer. */
   10008   MULTIPART_ACCUMULATE = 1,
   10009 
   10010   /* We are processing multipart data by pushing each part directly to the
   10011    * current string handlers. */
   10012   MULTIPART_PUSHEAGERLY = 2
   10013 };
   10014 
   10015 /* Start a multi-part text value where we accumulate the data for processing at
   10016  * the end. */
   10017 static void multipart_startaccum(upb_json_parser *p) {
   10018   assert_accumulate_empty(p);
   10019   assert(p->multipart_state == MULTIPART_INACTIVE);
   10020   p->multipart_state = MULTIPART_ACCUMULATE;
   10021 }
   10022 
   10023 /* Start a multi-part text value where we immediately push text data to a string
   10024  * value with the given selector. */
   10025 static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
   10026   assert_accumulate_empty(p);
   10027   assert(p->multipart_state == MULTIPART_INACTIVE);
   10028   p->multipart_state = MULTIPART_PUSHEAGERLY;
   10029   p->string_selector = sel;
   10030 }
   10031 
   10032 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
   10033                            bool can_alias) {
   10034   switch (p->multipart_state) {
   10035     case MULTIPART_INACTIVE:
   10036       upb_status_seterrmsg(
   10037           &p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
   10038       upb_env_reporterror(p->env, &p->status);
   10039       return false;
   10040 
   10041     case MULTIPART_ACCUMULATE:
   10042       if (!accumulate_append(p, buf, len, can_alias)) {
   10043         return false;
   10044       }
   10045       break;
   10046 
   10047     case MULTIPART_PUSHEAGERLY: {
   10048       const upb_bufhandle *handle = can_alias ? p->handle : NULL;
   10049       upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle);
   10050       break;
   10051     }
   10052   }
   10053 
   10054   return true;
   10055 }
   10056 
   10057 /* Note: this invalidates the accumulate buffer!  Call only after reading its
   10058  * contents. */
   10059 static void multipart_end(upb_json_parser *p) {
   10060   assert(p->multipart_state != MULTIPART_INACTIVE);
   10061   p->multipart_state = MULTIPART_INACTIVE;
   10062   accumulate_clear(p);
   10063 }
   10064 
   10065 
   10066 /* Input capture **************************************************************/
   10067 
   10068 /* Functionality for capturing a region of the input as text.  Gracefully
   10069  * handles the case where a buffer seam occurs in the middle of the captured
   10070  * region. */
   10071 
   10072 static void capture_begin(upb_json_parser *p, const char *ptr) {
   10073   assert(p->multipart_state != MULTIPART_INACTIVE);
   10074   assert(p->capture == NULL);
   10075   p->capture = ptr;
   10076 }
   10077 
   10078 static bool capture_end(upb_json_parser *p, const char *ptr) {
   10079   assert(p->capture);
   10080   if (multipart_text(p, p->capture, ptr - p->capture, true)) {
   10081     p->capture = NULL;
   10082     return true;
   10083   } else {
   10084     return false;
   10085   }
   10086 }
   10087 
   10088 /* This is called at the end of each input buffer (ie. when we have hit a
   10089  * buffer seam).  If we are in the middle of capturing the input, this
   10090  * processes the unprocessed capture region. */
   10091 static void capture_suspend(upb_json_parser *p, const char **ptr) {
   10092   if (!p->capture) return;
   10093 
   10094   if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
   10095     /* We use this as a signal that we were in the middle of capturing, and
   10096      * that capturing should resume at the beginning of the next buffer.
   10097      *
   10098      * We can't use *ptr here, because we have no guarantee that this pointer
   10099      * will be valid when we resume (if the underlying memory is freed, then
   10100      * using the pointer at all, even to compare to NULL, is likely undefined
   10101      * behavior). */
   10102     p->capture = &suspend_capture;
   10103   } else {
   10104     /* Need to back up the pointer to the beginning of the capture, since
   10105      * we were not able to actually preserve it. */
   10106     *ptr = p->capture;
   10107   }
   10108 }
   10109 
   10110 static void capture_resume(upb_json_parser *p, const char *ptr) {
   10111   if (p->capture) {
   10112     assert(p->capture == &suspend_capture);
   10113     p->capture = ptr;
   10114   }
   10115 }
   10116 
   10117 
   10118 /* Callbacks from the parser **************************************************/
   10119 
   10120 /* These are the functions called directly from the parser itself.
   10121  * We define these in the same order as their declarations in the parser. */
   10122 
   10123 static char escape_char(char in) {
   10124   switch (in) {
   10125     case 'r': return '\r';
   10126     case 't': return '\t';
   10127     case 'n': return '\n';
   10128     case 'f': return '\f';
   10129     case 'b': return '\b';
   10130     case '/': return '/';
   10131     case '"': return '"';
   10132     case '\\': return '\\';
   10133     default:
   10134       assert(0);
   10135       return 'x';
   10136   }
   10137 }
   10138 
   10139 static bool escape(upb_json_parser *p, const char *ptr) {
   10140   char ch = escape_char(*ptr);
   10141   return multipart_text(p, &ch, 1, false);
   10142 }
   10143 
   10144 static void start_hex(upb_json_parser *p) {
   10145   p->digit = 0;
   10146 }
   10147 
   10148 static void hexdigit(upb_json_parser *p, const char *ptr) {
   10149   char ch = *ptr;
   10150 
   10151   p->digit <<= 4;
   10152 
   10153   if (ch >= '0' && ch <= '9') {
   10154     p->digit += (ch - '0');
   10155   } else if (ch >= 'a' && ch <= 'f') {
   10156     p->digit += ((ch - 'a') + 10);
   10157   } else {
   10158     assert(ch >= 'A' && ch <= 'F');
   10159     p->digit += ((ch - 'A') + 10);
   10160   }
   10161 }
   10162 
   10163 static bool end_hex(upb_json_parser *p) {
   10164   uint32_t codepoint = p->digit;
   10165 
   10166   /* emit the codepoint as UTF-8. */
   10167   char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
   10168   int length = 0;
   10169   if (codepoint <= 0x7F) {
   10170     utf8[0] = codepoint;
   10171     length = 1;
   10172   } else if (codepoint <= 0x07FF) {
   10173     utf8[1] = (codepoint & 0x3F) | 0x80;
   10174     codepoint >>= 6;
   10175     utf8[0] = (codepoint & 0x1F) | 0xC0;
   10176     length = 2;
   10177   } else /* codepoint <= 0xFFFF */ {
   10178     utf8[2] = (codepoint & 0x3F) | 0x80;
   10179     codepoint >>= 6;
   10180     utf8[1] = (codepoint & 0x3F) | 0x80;
   10181     codepoint >>= 6;
   10182     utf8[0] = (codepoint & 0x0F) | 0xE0;
   10183     length = 3;
   10184   }
   10185   /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
   10186    * we have to wait for the next escape to get the full code point). */
   10187 
   10188   return multipart_text(p, utf8, length, false);
   10189 }
   10190 
   10191 static void start_text(upb_json_parser *p, const char *ptr) {
   10192   capture_begin(p, ptr);
   10193 }
   10194 
   10195 static bool end_text(upb_json_parser *p, const char *ptr) {
   10196   return capture_end(p, ptr);
   10197 }
   10198 
   10199 static void start_number(upb_json_parser *p, const char *ptr) {
   10200   multipart_startaccum(p);
   10201   capture_begin(p, ptr);
   10202 }
   10203 
   10204 static bool parse_number(upb_json_parser *p);
   10205 
   10206 static bool end_number(upb_json_parser *p, const char *ptr) {
   10207   if (!capture_end(p, ptr)) {
   10208     return false;
   10209   }
   10210 
   10211   return parse_number(p);
   10212 }
   10213 
   10214 static bool parse_number(upb_json_parser *p) {
   10215   size_t len;
   10216   const char *buf;
   10217   const char *myend;
   10218   char *end;
   10219 
   10220   /* strtol() and friends unfortunately do not support specifying the length of
   10221    * the input string, so we need to force a copy into a NULL-terminated buffer. */
   10222   if (!multipart_text(p, "\0", 1, false)) {
   10223     return false;
   10224   }
   10225 
   10226   buf = accumulate_getptr(p, &len);
   10227   myend = buf + len - 1;  /* One for NULL. */
   10228 
   10229   /* XXX: We are using strtol to parse integers, but this is wrong as even
   10230    * integers can be represented as 1e6 (for example), which strtol can't
   10231    * handle correctly.
   10232    *
   10233    * XXX: Also, we can't handle large integers properly because strto[u]ll
   10234    * isn't in C89.
   10235    *
   10236    * XXX: Also, we don't properly check floats for overflow, since strtof
   10237    * isn't in C89. */
   10238   switch (upb_fielddef_type(p->top->f)) {
   10239     case UPB_TYPE_ENUM:
   10240     case UPB_TYPE_INT32: {
   10241       long val = strtol(p->accumulated, &end, 0);
   10242       if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend)
   10243         goto err;
   10244       else
   10245         upb_sink_putint32(&p->top->sink, parser_getsel(p), val);
   10246       break;
   10247     }
   10248     case UPB_TYPE_INT64: {
   10249       long long val = strtol(p->accumulated, &end, 0);
   10250       if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend)
   10251         goto err;
   10252       else
   10253         upb_sink_putint64(&p->top->sink, parser_getsel(p), val);
   10254       break;
   10255     }
   10256     case UPB_TYPE_UINT32: {
   10257       unsigned long val = strtoul(p->accumulated, &end, 0);
   10258       if (val > UINT32_MAX || errno == ERANGE || end != myend)
   10259         goto err;
   10260       else
   10261         upb_sink_putuint32(&p->top->sink, parser_getsel(p), val);
   10262       break;
   10263     }
   10264     case UPB_TYPE_UINT64: {
   10265       unsigned long long val = strtoul(p->accumulated, &end, 0);
   10266       if (val > UINT64_MAX || errno == ERANGE || end != myend)
   10267         goto err;
   10268       else
   10269         upb_sink_putuint64(&p->top->sink, parser_getsel(p), val);
   10270       break;
   10271     }
   10272     case UPB_TYPE_DOUBLE: {
   10273       double val = strtod(p->accumulated, &end);
   10274       if (errno == ERANGE || end != myend)
   10275         goto err;
   10276       else
   10277         upb_sink_putdouble(&p->top->sink, parser_getsel(p), val);
   10278       break;
   10279     }
   10280     case UPB_TYPE_FLOAT: {
   10281       float val = strtod(p->accumulated, &end);
   10282       if (errno == ERANGE || end != myend)
   10283         goto err;
   10284       else
   10285         upb_sink_putfloat(&p->top->sink, parser_getsel(p), val);
   10286       break;
   10287     }
   10288     default:
   10289       assert(false);
   10290   }
   10291 
   10292   multipart_end(p);
   10293 
   10294   return true;
   10295 
   10296 err:
   10297   upb_status_seterrf(&p->status, "error parsing number: %s", buf);
   10298   upb_env_reporterror(p->env, &p->status);
   10299   multipart_end(p);
   10300   return false;
   10301 }
   10302 
   10303 static bool parser_putbool(upb_json_parser *p, bool val) {
   10304   bool ok;
   10305 
   10306   if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
   10307     upb_status_seterrf(&p->status,
   10308                        "Boolean value specified for non-bool field: %s",
   10309                        upb_fielddef_name(p->top->f));
   10310     upb_env_reporterror(p->env, &p->status);
   10311     return false;
   10312   }
   10313 
   10314   ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val);
   10315   UPB_ASSERT_VAR(ok, ok);
   10316 
   10317   return true;
   10318 }
   10319 
   10320 static bool start_stringval(upb_json_parser *p) {
   10321   assert(p->top->f);
   10322 
   10323   if (upb_fielddef_isstring(p->top->f)) {
   10324     upb_jsonparser_frame *inner;
   10325     upb_selector_t sel;
   10326 
   10327     if (!check_stack(p)) return false;
   10328 
   10329     /* Start a new parser frame: parser frames correspond one-to-one with
   10330      * handler frames, and string events occur in a sub-frame. */
   10331     inner = p->top + 1;
   10332     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
   10333     upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink);
   10334     inner->m = p->top->m;
   10335     inner->f = p->top->f;
   10336     inner->is_map = false;
   10337     inner->is_mapentry = false;
   10338     p->top = inner;
   10339 
   10340     if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
   10341       /* For STRING fields we push data directly to the handlers as it is
   10342        * parsed.  We don't do this yet for BYTES fields, because our base64
   10343        * decoder is not streaming.
   10344        *
   10345        * TODO(haberman): make base64 decoding streaming also. */
   10346       multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
   10347       return true;
   10348     } else {
   10349       multipart_startaccum(p);
   10350       return true;
   10351     }
   10352   } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) {
   10353     /* No need to push a frame -- symbolic enum names in quotes remain in the
   10354      * current parser frame.
   10355      *
   10356      * Enum string values must accumulate so we can look up the value in a table
   10357      * once it is complete. */
   10358     multipart_startaccum(p);
   10359     return true;
   10360   } else {
   10361     upb_status_seterrf(&p->status,
   10362                        "String specified for non-string/non-enum field: %s",
   10363                        upb_fielddef_name(p->top->f));
   10364     upb_env_reporterror(p->env, &p->status);
   10365     return false;
   10366   }
   10367 }
   10368 
   10369 static bool end_stringval(upb_json_parser *p) {
   10370   bool ok = true;
   10371 
   10372   switch (upb_fielddef_type(p->top->f)) {
   10373     case UPB_TYPE_BYTES:
   10374       if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
   10375                        p->accumulated, p->accumulated_len)) {
   10376         return false;
   10377       }
   10378       /* Fall through. */
   10379 
   10380     case UPB_TYPE_STRING: {
   10381       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
   10382       upb_sink_endstr(&p->top->sink, sel);
   10383       p->top--;
   10384       break;
   10385     }
   10386 
   10387     case UPB_TYPE_ENUM: {
   10388       /* Resolve enum symbolic name to integer value. */
   10389       const upb_enumdef *enumdef =
   10390           (const upb_enumdef*)upb_fielddef_subdef(p->top->f);
   10391 
   10392       size_t len;
   10393       const char *buf = accumulate_getptr(p, &len);
   10394 
   10395       int32_t int_val = 0;
   10396       ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
   10397 
   10398       if (ok) {
   10399         upb_selector_t sel = parser_getsel(p);
   10400         upb_sink_putint32(&p->top->sink, sel, int_val);
   10401       } else {
   10402         upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf);
   10403         upb_env_reporterror(p->env, &p->status);
   10404       }
   10405 
   10406       break;
   10407     }
   10408 
   10409     default:
   10410       assert(false);
   10411       upb_status_seterrmsg(&p->status, "Internal error in JSON decoder");
   10412       upb_env_reporterror(p->env, &p->status);
   10413       ok = false;
   10414       break;
   10415   }
   10416 
   10417   multipart_end(p);
   10418 
   10419   return ok;
   10420 }
   10421 
   10422 static void start_member(upb_json_parser *p) {
   10423   assert(!p->top->f);
   10424   multipart_startaccum(p);
   10425 }
   10426 
   10427 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key
   10428  * field based on the current contents of the accumulate buffer. */
   10429 static bool parse_mapentry_key(upb_json_parser *p) {
   10430 
   10431   size_t len;
   10432   const char *buf = accumulate_getptr(p, &len);
   10433 
   10434   /* Emit the key field. We do a bit of ad-hoc parsing here because the
   10435    * parser state machine has already decided that this is a string field
   10436    * name, and we are reinterpreting it as some arbitrary key type. In
   10437    * particular, integer and bool keys are quoted, so we need to parse the
   10438    * quoted string contents here. */
   10439 
   10440   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
   10441   if (p->top->f == NULL) {
   10442     upb_status_seterrmsg(&p->status, "mapentry message has no key");
   10443     upb_env_reporterror(p->env, &p->status);
   10444     return false;
   10445   }
   10446   switch (upb_fielddef_type(p->top->f)) {
   10447     case UPB_TYPE_INT32:
   10448     case UPB_TYPE_INT64:
   10449     case UPB_TYPE_UINT32:
   10450     case UPB_TYPE_UINT64:
   10451       /* Invoke end_number. The accum buffer has the number's text already. */
   10452       if (!parse_number(p)) {
   10453         return false;
   10454       }
   10455       break;
   10456     case UPB_TYPE_BOOL:
   10457       if (len == 4 && !strncmp(buf, "true", 4)) {
   10458         if (!parser_putbool(p, true)) {
   10459           return false;
   10460         }
   10461       } else if (len == 5 && !strncmp(buf, "false", 5)) {
   10462         if (!parser_putbool(p, false)) {
   10463           return false;
   10464         }
   10465       } else {
   10466         upb_status_seterrmsg(&p->status,
   10467                              "Map bool key not 'true' or 'false'");
   10468         upb_env_reporterror(p->env, &p->status);
   10469         return false;
   10470       }
   10471       multipart_end(p);
   10472       break;
   10473     case UPB_TYPE_STRING:
   10474     case UPB_TYPE_BYTES: {
   10475       upb_sink subsink;
   10476       upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
   10477       upb_sink_startstr(&p->top->sink, sel, len, &subsink);
   10478       sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
   10479       upb_sink_putstring(&subsink, sel, buf, len, NULL);
   10480       sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
   10481       upb_sink_endstr(&subsink, sel);
   10482       multipart_end(p);
   10483       break;
   10484     }
   10485     default:
   10486       upb_status_seterrmsg(&p->status, "Invalid field type for map key");
   10487       upb_env_reporterror(p->env, &p->status);
   10488       return false;
   10489   }
   10490 
   10491   return true;
   10492 }
   10493 
   10494 /* Helper: emit one map entry (as a submessage in the map field sequence). This
   10495  * is invoked from end_membername(), at the end of the map entry's key string,
   10496  * with the map key in the accumulate buffer. It parses the key from that
   10497  * buffer, emits the handler calls to start the mapentry submessage (setting up
   10498  * its subframe in the process), and sets up state in the subframe so that the
   10499  * value parser (invoked next) will emit the mapentry's value field and then
   10500  * end the mapentry message. */
   10501 
   10502 static bool handle_mapentry(upb_json_parser *p) {
   10503   const upb_fielddef *mapfield;
   10504   const upb_msgdef *mapentrymsg;
   10505   upb_jsonparser_frame *inner;
   10506   upb_selector_t sel;
   10507 
   10508   /* Map entry: p->top->sink is the seq frame, so we need to start a frame
   10509    * for the mapentry itself, and then set |f| in that frame so that the map
   10510    * value field is parsed, and also set a flag to end the frame after the
   10511    * map-entry value is parsed. */
   10512   if (!check_stack(p)) return false;
   10513 
   10514   mapfield = p->top->mapfield;
   10515   mapentrymsg = upb_fielddef_msgsubdef(mapfield);
   10516 
   10517   inner = p->top + 1;
   10518   p->top->f = mapfield;
   10519   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
   10520   upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
   10521   inner->m = mapentrymsg;
   10522   inner->mapfield = mapfield;
   10523   inner->is_map = false;
   10524 
   10525   /* Don't set this to true *yet* -- we reuse parsing handlers below to push
   10526    * the key field value to the sink, and these handlers will pop the frame
   10527    * if they see is_mapentry (when invoked by the parser state machine, they
   10528    * would have just seen the map-entry value, not key). */
   10529   inner->is_mapentry = false;
   10530   p->top = inner;
   10531 
   10532   /* send STARTMSG in submsg frame. */
   10533   upb_sink_startmsg(&p->top->sink);
   10534 
   10535   parse_mapentry_key(p);
   10536 
   10537   /* Set up the value field to receive the map-entry value. */
   10538   p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
   10539   p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
   10540   p->top->mapfield = mapfield;
   10541   if (p->top->f == NULL) {
   10542     upb_status_seterrmsg(&p->status, "mapentry message has no value");
   10543     upb_env_reporterror(p->env, &p->status);
   10544     return false;
   10545   }
   10546 
   10547   return true;
   10548 }
   10549 
   10550 static bool end_membername(upb_json_parser *p) {
   10551   assert(!p->top->f);
   10552 
   10553   if (p->top->is_map) {
   10554     return handle_mapentry(p);
   10555   } else {
   10556     size_t len;
   10557     const char *buf = accumulate_getptr(p, &len);
   10558     const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len);
   10559 
   10560     if (!f) {
   10561       /* TODO(haberman): Ignore unknown fields if requested/configured to do
   10562        * so. */
   10563       upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf);
   10564       upb_env_reporterror(p->env, &p->status);
   10565       return false;
   10566     }
   10567 
   10568     p->top->f = f;
   10569     multipart_end(p);
   10570 
   10571     return true;
   10572   }
   10573 }
   10574 
   10575 static void end_member(upb_json_parser *p) {
   10576   /* If we just parsed a map-entry value, end that frame too. */
   10577   if (p->top->is_mapentry) {
   10578     upb_status s = UPB_STATUS_INIT;
   10579     upb_selector_t sel;
   10580     bool ok;
   10581     const upb_fielddef *mapfield;
   10582 
   10583     assert(p->top > p->stack);
   10584     /* send ENDMSG on submsg. */
   10585     upb_sink_endmsg(&p->top->sink, &s);
   10586     mapfield = p->top->mapfield;
   10587 
   10588     /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
   10589     p->top--;
   10590     ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
   10591     UPB_ASSERT_VAR(ok, ok);
   10592     upb_sink_endsubmsg(&p->top->sink, sel);
   10593   }
   10594 
   10595   p->top->f = NULL;
   10596 }
   10597 
   10598 static bool start_subobject(upb_json_parser *p) {
   10599   assert(p->top->f);
   10600 
   10601   if (upb_fielddef_ismap(p->top->f)) {
   10602     upb_jsonparser_frame *inner;
   10603     upb_selector_t sel;
   10604 
   10605     /* Beginning of a map. Start a new parser frame in a repeated-field
   10606      * context. */
   10607     if (!check_stack(p)) return false;
   10608 
   10609     inner = p->top + 1;
   10610     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
   10611     upb_sink_startseq(&p->top->sink, sel, &inner->sink);
   10612     inner->m = upb_fielddef_msgsubdef(p->top->f);
   10613     inner->mapfield = p->top->f;
   10614     inner->f = NULL;
   10615     inner->is_map = true;
   10616     inner->is_mapentry = false;
   10617     p->top = inner;
   10618 
   10619     return true;
   10620   } else if (upb_fielddef_issubmsg(p->top->f)) {
   10621     upb_jsonparser_frame *inner;
   10622     upb_selector_t sel;
   10623 
   10624     /* Beginning of a subobject. Start a new parser frame in the submsg
   10625      * context. */
   10626     if (!check_stack(p)) return false;
   10627 
   10628     inner = p->top + 1;
   10629 
   10630     sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
   10631     upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink);
   10632     inner->m = upb_fielddef_msgsubdef(p->top->f);
   10633     inner->f = NULL;
   10634     inner->is_map = false;
   10635     inner->is_mapentry = false;
   10636     p->top = inner;
   10637 
   10638     return true;
   10639   } else {
   10640     upb_status_seterrf(&p->status,
   10641                        "Object specified for non-message/group field: %s",
   10642                        upb_fielddef_name(p->top->f));
   10643     upb_env_reporterror(p->env, &p->status);
   10644     return false;
   10645   }
   10646 }
   10647 
   10648 static void end_subobject(upb_json_parser *p) {
   10649   if (p->top->is_map) {
   10650     upb_selector_t sel;
   10651     p->top--;
   10652     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
   10653     upb_sink_endseq(&p->top->sink, sel);
   10654   } else {
   10655     upb_selector_t sel;
   10656     p->top--;
   10657     sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
   10658     upb_sink_endsubmsg(&p->top->sink, sel);
   10659   }
   10660 }
   10661 
   10662 static bool start_array(upb_json_parser *p) {
   10663   upb_jsonparser_frame *inner;
   10664   upb_selector_t sel;
   10665 
   10666   assert(p->top->f);
   10667 
   10668   if (!upb_fielddef_isseq(p->top->f)) {
   10669     upb_status_seterrf(&p->status,
   10670                        "Array specified for non-repeated field: %s",
   10671                        upb_fielddef_name(p->top->f));
   10672     upb_env_reporterror(p->env, &p->status);
   10673     return false;
   10674   }
   10675 
   10676   if (!check_stack(p)) return false;
   10677 
   10678   inner = p->top + 1;
   10679   sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
   10680   upb_sink_startseq(&p->top->sink, sel, &inner->sink);
   10681   inner->m = p->top->m;
   10682   inner->f = p->top->f;
   10683   inner->is_map = false;
   10684   inner->is_mapentry = false;
   10685   p->top = inner;
   10686 
   10687   return true;
   10688 }
   10689 
   10690 static void end_array(upb_json_parser *p) {
   10691   upb_selector_t sel;
   10692 
   10693   assert(p->top > p->stack);
   10694 
   10695   p->top--;
   10696   sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
   10697   upb_sink_endseq(&p->top->sink, sel);
   10698 }
   10699 
   10700 static void start_object(upb_json_parser *p) {
   10701   if (!p->top->is_map) {
   10702     upb_sink_startmsg(&p->top->sink);
   10703   }
   10704 }
   10705 
   10706 static void end_object(upb_json_parser *p) {
   10707   if (!p->top->is_map) {
   10708     upb_status status;
   10709     upb_status_clear(&status);
   10710     upb_sink_endmsg(&p->top->sink, &status);
   10711     if (!upb_ok(&status)) {
   10712       upb_env_reporterror(p->env, &status);
   10713     }
   10714   }
   10715 }
   10716 
   10717 
   10718 #define CHECK_RETURN_TOP(x) if (!(x)) goto error
   10719 
   10720 
   10721 /* The actual parser **********************************************************/
   10722 
   10723 /* What follows is the Ragel parser itself.  The language is specified in Ragel
   10724  * and the actions call our C functions above.
   10725  *
   10726  * Ragel has an extensive set of functionality, and we use only a small part of
   10727  * it.  There are many action types but we only use a few:
   10728  *
   10729  *   ">" -- transition into a machine
   10730  *   "%" -- transition out of a machine
   10731  *   "@" -- transition into a final state of a machine.
   10732  *
   10733  * "@" transitions are tricky because a machine can transition into a final
   10734  * state repeatedly.  But in some cases we know this can't happen, for example
   10735  * a string which is delimited by a final '"' can only transition into its
   10736  * final state once, when the closing '"' is seen. */
   10737 
   10738 
   10739 #line 1218 "upb/json/parser.rl"
   10740 
   10741 
   10742 
   10743 #line 1130 "upb/json/parser.c"
   10744 static const char _json_actions[] = {
   10745 	0, 1, 0, 1, 2, 1, 3, 1,
   10746 	5, 1, 6, 1, 7, 1, 8, 1,
   10747 	10, 1, 12, 1, 13, 1, 14, 1,
   10748 	15, 1, 16, 1, 17, 1, 21, 1,
   10749 	25, 1, 27, 2, 3, 8, 2, 4,
   10750 	5, 2, 6, 2, 2, 6, 8, 2,
   10751 	11, 9, 2, 13, 15, 2, 14, 15,
   10752 	2, 18, 1, 2, 19, 27, 2, 20,
   10753 	9, 2, 22, 27, 2, 23, 27, 2,
   10754 	24, 27, 2, 26, 27, 3, 14, 11,
   10755 	9
   10756 };
   10757 
   10758 static const unsigned char _json_key_offsets[] = {
   10759 	0, 0, 4, 9, 14, 15, 19, 24,
   10760 	29, 34, 38, 42, 45, 48, 50, 54,
   10761 	58, 60, 62, 67, 69, 71, 80, 86,
   10762 	92, 98, 104, 106, 115, 116, 116, 116,
   10763 	121, 126, 131, 132, 133, 134, 135, 135,
   10764 	136, 137, 138, 138, 139, 140, 141, 141,
   10765 	146, 151, 152, 156, 161, 166, 171, 175,
   10766 	175, 178, 178, 178
   10767 };
   10768 
   10769 static const char _json_trans_keys[] = {
   10770 	32, 123, 9, 13, 32, 34, 125, 9,
   10771 	13, 32, 34, 125, 9, 13, 34, 32,
   10772 	58, 9, 13, 32, 93, 125, 9, 13,
   10773 	32, 44, 125, 9, 13, 32, 44, 125,
   10774 	9, 13, 32, 34, 9, 13, 45, 48,
   10775 	49, 57, 48, 49, 57, 46, 69, 101,
   10776 	48, 57, 69, 101, 48, 57, 43, 45,
   10777 	48, 57, 48, 57, 48, 57, 46, 69,
   10778 	101, 48, 57, 34, 92, 34, 92, 34,
   10779 	47, 92, 98, 102, 110, 114, 116, 117,
   10780 	48, 57, 65, 70, 97, 102, 48, 57,
   10781 	65, 70, 97, 102, 48, 57, 65, 70,
   10782 	97, 102, 48, 57, 65, 70, 97, 102,
   10783 	34, 92, 34, 45, 91, 102, 110, 116,
   10784 	123, 48, 57, 34, 32, 93, 125, 9,
   10785 	13, 32, 44, 93, 9, 13, 32, 93,
   10786 	125, 9, 13, 97, 108, 115, 101, 117,
   10787 	108, 108, 114, 117, 101, 32, 34, 125,
   10788 	9, 13, 32, 34, 125, 9, 13, 34,
   10789 	32, 58, 9, 13, 32, 93, 125, 9,
   10790 	13, 32, 44, 125, 9, 13, 32, 44,
   10791 	125, 9, 13, 32, 34, 9, 13, 32,
   10792 	9, 13, 0
   10793 };
   10794 
   10795 static const char _json_single_lengths[] = {
   10796 	0, 2, 3, 3, 1, 2, 3, 3,
   10797 	3, 2, 2, 1, 3, 0, 2, 2,
   10798 	0, 0, 3, 2, 2, 9, 0, 0,
   10799 	0, 0, 2, 7, 1, 0, 0, 3,
   10800 	3, 3, 1, 1, 1, 1, 0, 1,
   10801 	1, 1, 0, 1, 1, 1, 0, 3,
   10802 	3, 1, 2, 3, 3, 3, 2, 0,
   10803 	1, 0, 0, 0
   10804 };
   10805 
   10806 static const char _json_range_lengths[] = {
   10807 	0, 1, 1, 1, 0, 1, 1, 1,
   10808 	1, 1, 1, 1, 0, 1, 1, 1,
   10809 	1, 1, 1, 0, 0, 0, 3, 3,
   10810 	3, 3, 0, 1, 0, 0, 0, 1,
   10811 	1, 1, 0, 0, 0, 0, 0, 0,
   10812 	0, 0, 0, 0, 0, 0, 0, 1,
   10813 	1, 0, 1, 1, 1, 1, 1, 0,
   10814 	1, 0, 0, 0
   10815 };
   10816 
   10817 static const short _json_index_offsets[] = {
   10818 	0, 0, 4, 9, 14, 16, 20, 25,
   10819 	30, 35, 39, 43, 46, 50, 52, 56,
   10820 	60, 62, 64, 69, 72, 75, 85, 89,
   10821 	93, 97, 101, 104, 113, 115, 116, 117,
   10822 	122, 127, 132, 134, 136, 138, 140, 141,
   10823 	143, 145, 147, 148, 150, 152, 154, 155,
   10824 	160, 165, 167, 171, 176, 181, 186, 190,
   10825 	191, 194, 195, 196
   10826 };
   10827 
   10828 static const char _json_indicies[] = {
   10829 	0, 2, 0, 1, 3, 4, 5, 3,
   10830 	1, 6, 7, 8, 6, 1, 9, 1,
   10831 	10, 11, 10, 1, 11, 1, 1, 11,
   10832 	12, 13, 14, 15, 13, 1, 16, 17,
   10833 	8, 16, 1, 17, 7, 17, 1, 18,
   10834 	19, 20, 1, 19, 20, 1, 22, 23,
   10835 	23, 21, 24, 1, 23, 23, 24, 21,
   10836 	25, 25, 26, 1, 26, 1, 26, 21,
   10837 	22, 23, 23, 20, 21, 28, 29, 27,
   10838 	31, 32, 30, 33, 33, 33, 33, 33,
   10839 	33, 33, 33, 34, 1, 35, 35, 35,
   10840 	1, 36, 36, 36, 1, 37, 37, 37,
   10841 	1, 38, 38, 38, 1, 40, 41, 39,
   10842 	42, 43, 44, 45, 46, 47, 48, 43,
   10843 	1, 49, 1, 50, 51, 53, 54, 1,
   10844 	53, 52, 55, 56, 54, 55, 1, 56,
   10845 	1, 1, 56, 52, 57, 1, 58, 1,
   10846 	59, 1, 60, 1, 61, 62, 1, 63,
   10847 	1, 64, 1, 65, 66, 1, 67, 1,
   10848 	68, 1, 69, 70, 71, 72, 70, 1,
   10849 	73, 74, 75, 73, 1, 76, 1, 77,
   10850 	78, 77, 1, 78, 1, 1, 78, 79,
   10851 	80, 81, 82, 80, 1, 83, 84, 75,
   10852 	83, 1, 84, 74, 84, 1, 85, 86,
   10853 	86, 1, 1, 1, 1, 0
   10854 };
   10855 
   10856 static const char _json_trans_targs[] = {
   10857 	1, 0, 2, 3, 4, 56, 3, 4,
   10858 	56, 5, 5, 6, 7, 8, 9, 56,
   10859 	8, 9, 11, 12, 18, 57, 13, 15,
   10860 	14, 16, 17, 20, 58, 21, 20, 58,
   10861 	21, 19, 22, 23, 24, 25, 26, 20,
   10862 	58, 21, 28, 30, 31, 34, 39, 43,
   10863 	47, 29, 59, 59, 32, 31, 29, 32,
   10864 	33, 35, 36, 37, 38, 59, 40, 41,
   10865 	42, 59, 44, 45, 46, 59, 48, 49,
   10866 	55, 48, 49, 55, 50, 50, 51, 52,
   10867 	53, 54, 55, 53, 54, 59, 56
   10868 };
   10869 
   10870 static const char _json_trans_actions[] = {
   10871 	0, 0, 0, 21, 77, 53, 0, 47,
   10872 	23, 17, 0, 0, 15, 19, 19, 50,
   10873 	0, 0, 0, 0, 0, 1, 0, 0,
   10874 	0, 0, 0, 3, 13, 0, 0, 35,
   10875 	5, 11, 0, 38, 7, 7, 7, 41,
   10876 	44, 9, 62, 56, 25, 0, 0, 0,
   10877 	31, 29, 33, 59, 15, 0, 27, 0,
   10878 	0, 0, 0, 0, 0, 68, 0, 0,
   10879 	0, 71, 0, 0, 0, 65, 21, 77,
   10880 	53, 0, 47, 23, 17, 0, 0, 15,
   10881 	19, 19, 50, 0, 0, 74, 0
   10882 };
   10883 
   10884 static const int json_start = 1;
   10885 
   10886 static const int json_en_number_machine = 10;
   10887 static const int json_en_string_machine = 19;
   10888 static const int json_en_value_machine = 27;
   10889 static const int json_en_main = 1;
   10890 
   10891 
   10892 #line 1221 "upb/json/parser.rl"
   10893 
   10894 size_t parse(void *closure, const void *hd, const char *buf, size_t size,
   10895              const upb_bufhandle *handle) {
   10896   upb_json_parser *parser = closure;
   10897 
   10898   /* Variables used by Ragel's generated code. */
   10899   int cs = parser->current_state;
   10900   int *stack = parser->parser_stack;
   10901   int top = parser->parser_top;
   10902 
   10903   const char *p = buf;
   10904   const char *pe = buf + size;
   10905 
   10906   parser->handle = handle;
   10907 
   10908   UPB_UNUSED(hd);
   10909   UPB_UNUSED(handle);
   10910 
   10911   capture_resume(parser, buf);
   10912 
   10913 
   10914 #line 1301 "upb/json/parser.c"
   10915 	{
   10916 	int _klen;
   10917 	unsigned int _trans;
   10918 	const char *_acts;
   10919 	unsigned int _nacts;
   10920 	const char *_keys;
   10921 
   10922 	if ( p == pe )
   10923 		goto _test_eof;
   10924 	if ( cs == 0 )
   10925 		goto _out;
   10926 _resume:
   10927 	_keys = _json_trans_keys + _json_key_offsets[cs];
   10928 	_trans = _json_index_offsets[cs];
   10929 
   10930 	_klen = _json_single_lengths[cs];
   10931 	if ( _klen > 0 ) {
   10932 		const char *_lower = _keys;
   10933 		const char *_mid;
   10934 		const char *_upper = _keys + _klen - 1;
   10935 		while (1) {
   10936 			if ( _upper < _lower )
   10937 				break;
   10938 
   10939 			_mid = _lower + ((_upper-_lower) >> 1);
   10940 			if ( (*p) < *_mid )
   10941 				_upper = _mid - 1;
   10942 			else if ( (*p) > *_mid )
   10943 				_lower = _mid + 1;
   10944 			else {
   10945 				_trans += (unsigned int)(_mid - _keys);
   10946 				goto _match;
   10947 			}
   10948 		}
   10949 		_keys += _klen;
   10950 		_trans += _klen;
   10951 	}
   10952 
   10953 	_klen = _json_range_lengths[cs];
   10954 	if ( _klen > 0 ) {
   10955 		const char *_lower = _keys;
   10956 		const char *_mid;
   10957 		const char *_upper = _keys + (_klen<<1) - 2;
   10958 		while (1) {
   10959 			if ( _upper < _lower )
   10960 				break;
   10961 
   10962 			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
   10963 			if ( (*p) < _mid[0] )
   10964 				_upper = _mid - 2;
   10965 			else if ( (*p) > _mid[1] )
   10966 				_lower = _mid + 2;
   10967 			else {
   10968 				_trans += (unsigned int)((_mid - _keys)>>1);
   10969 				goto _match;
   10970 			}
   10971 		}
   10972 		_trans += _klen;
   10973 	}
   10974 
   10975 _match:
   10976 	_trans = _json_indicies[_trans];
   10977 	cs = _json_trans_targs[_trans];
   10978 
   10979 	if ( _json_trans_actions[_trans] == 0 )
   10980 		goto _again;
   10981 
   10982 	_acts = _json_actions + _json_trans_actions[_trans];
   10983 	_nacts = (unsigned int) *_acts++;
   10984 	while ( _nacts-- > 0 )
   10985 	{
   10986 		switch ( *_acts++ )
   10987 		{
   10988 	case 0:
   10989 #line 1133 "upb/json/parser.rl"
   10990 	{ p--; {cs = stack[--top]; goto _again;} }
   10991 	break;
   10992 	case 1:
   10993 #line 1134 "upb/json/parser.rl"
   10994 	{ p--; {stack[top++] = cs; cs = 10; goto _again;} }
   10995 	break;
   10996 	case 2:
   10997 #line 1138 "upb/json/parser.rl"
   10998 	{ start_text(parser, p); }
   10999 	break;
   11000 	case 3:
   11001 #line 1139 "upb/json/parser.rl"
   11002 	{ CHECK_RETURN_TOP(end_text(parser, p)); }
   11003 	break;
   11004 	case 4:
   11005 #line 1145 "upb/json/parser.rl"
   11006 	{ start_hex(parser); }
   11007 	break;
   11008 	case 5:
   11009 #line 1146 "upb/json/parser.rl"
   11010 	{ hexdigit(parser, p); }
   11011 	break;
   11012 	case 6:
   11013 #line 1147 "upb/json/parser.rl"
   11014 	{ CHECK_RETURN_TOP(end_hex(parser)); }
   11015 	break;
   11016 	case 7:
   11017 #line 1153 "upb/json/parser.rl"
   11018 	{ CHECK_RETURN_TOP(escape(parser, p)); }
   11019 	break;
   11020 	case 8:
   11021 #line 1159 "upb/json/parser.rl"
   11022 	{ p--; {cs = stack[--top]; goto _again;} }
   11023 	break;
   11024 	case 9:
   11025 #line 1162 "upb/json/parser.rl"
   11026 	{ {stack[top++] = cs; cs = 19; goto _again;} }
   11027 	break;
   11028 	case 10:
   11029 #line 1164 "upb/json/parser.rl"
   11030 	{ p--; {stack[top++] = cs; cs = 27; goto _again;} }
   11031 	break;
   11032 	case 11:
   11033 #line 1169 "upb/json/parser.rl"
   11034 	{ start_member(parser); }
   11035 	break;
   11036 	case 12:
   11037 #line 1170 "upb/json/parser.rl"
   11038 	{ CHECK_RETURN_TOP(end_membername(parser)); }
   11039 	break;
   11040 	case 13:
   11041 #line 1173 "upb/json/parser.rl"
   11042 	{ end_member(parser); }
   11043 	break;
   11044 	case 14:
   11045 #line 1179 "upb/json/parser.rl"
   11046 	{ start_object(parser); }
   11047 	break;
   11048 	case 15:
   11049 #line 1182 "upb/json/parser.rl"
   11050 	{ end_object(parser); }
   11051 	break;
   11052 	case 16:
   11053 #line 1188 "upb/json/parser.rl"
   11054 	{ CHECK_RETURN_TOP(start_array(parser)); }
   11055 	break;
   11056 	case 17:
   11057 #line 1192 "upb/json/parser.rl"
   11058 	{ end_array(parser); }
   11059 	break;
   11060 	case 18:
   11061 #line 1197 "upb/json/parser.rl"
   11062 	{ start_number(parser, p); }
   11063 	break;
   11064 	case 19:
   11065 #line 1198 "upb/json/parser.rl"
   11066 	{ CHECK_RETURN_TOP(end_number(parser, p)); }
   11067 	break;
   11068 	case 20:
   11069 #line 1200 "upb/json/parser.rl"
   11070 	{ CHECK_RETURN_TOP(start_stringval(parser)); }
   11071 	break;
   11072 	case 21:
   11073 #line 1201 "upb/json/parser.rl"
   11074 	{ CHECK_RETURN_TOP(end_stringval(parser)); }
   11075 	break;
   11076 	case 22:
   11077 #line 1203 "upb/json/parser.rl"
   11078 	{ CHECK_RETURN_TOP(parser_putbool(parser, true)); }
   11079 	break;
   11080 	case 23:
   11081 #line 1205 "upb/json/parser.rl"
   11082 	{ CHECK_RETURN_TOP(parser_putbool(parser, false)); }
   11083 	break;
   11084 	case 24:
   11085 #line 1207 "upb/json/parser.rl"
   11086 	{ /* null value */ }
   11087 	break;
   11088 	case 25:
   11089 #line 1209 "upb/json/parser.rl"
   11090 	{ CHECK_RETURN_TOP(start_subobject(parser)); }
   11091 	break;
   11092 	case 26:
   11093 #line 1210 "upb/json/parser.rl"
   11094 	{ end_subobject(parser); }
   11095 	break;
   11096 	case 27:
   11097 #line 1215 "upb/json/parser.rl"
   11098 	{ p--; {cs = stack[--top]; goto _again;} }
   11099 	break;
   11100 #line 1487 "upb/json/parser.c"
   11101 		}
   11102 	}
   11103 
   11104 _again:
   11105 	if ( cs == 0 )
   11106 		goto _out;
   11107 	if ( ++p != pe )
   11108 		goto _resume;
   11109 	_test_eof: {}
   11110 	_out: {}
   11111 	}
   11112 
   11113 #line 1242 "upb/json/parser.rl"
   11114 
   11115   if (p != pe) {
   11116     upb_status_seterrf(&parser->status, "Parse error at %s\n", p);
   11117     upb_env_reporterror(parser->env, &parser->status);
   11118   } else {
   11119     capture_suspend(parser, &p);
   11120   }
   11121 
   11122 error:
   11123   /* Save parsing state back to parser. */
   11124   parser->current_state = cs;
   11125   parser->parser_top = top;
   11126 
   11127   return p - buf;
   11128 }
   11129 
   11130 bool end(void *closure, const void *hd) {
   11131   UPB_UNUSED(closure);
   11132   UPB_UNUSED(hd);
   11133 
   11134   /* Prevent compile warning on unused static constants. */
   11135   UPB_UNUSED(json_start);
   11136   UPB_UNUSED(json_en_number_machine);
   11137   UPB_UNUSED(json_en_string_machine);
   11138   UPB_UNUSED(json_en_value_machine);
   11139   UPB_UNUSED(json_en_main);
   11140   return true;
   11141 }
   11142 
   11143 static void json_parser_reset(upb_json_parser *p) {
   11144   int cs;
   11145   int top;
   11146 
   11147   p->top = p->stack;
   11148   p->top->f = NULL;
   11149   p->top->is_map = false;
   11150   p->top->is_mapentry = false;
   11151 
   11152   /* Emit Ragel initialization of the parser. */
   11153 
   11154 #line 1541 "upb/json/parser.c"
   11155 	{
   11156 	cs = json_start;
   11157 	top = 0;
   11158 	}
   11159 
   11160 #line 1282 "upb/json/parser.rl"
   11161   p->current_state = cs;
   11162   p->parser_top = top;
   11163   accumulate_clear(p);
   11164   p->multipart_state = MULTIPART_INACTIVE;
   11165   p->capture = NULL;
   11166   p->accumulated = NULL;
   11167   upb_status_clear(&p->status);
   11168 }
   11169 
   11170 
   11171 /* Public API *****************************************************************/
   11172 
   11173 upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) {
   11174 #ifndef NDEBUG
   11175   const size_t size_before = upb_env_bytesallocated(env);
   11176 #endif
   11177   upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser));
   11178   if (!p) return false;
   11179 
   11180   p->env = env;
   11181   p->limit = p->stack + UPB_JSON_MAX_DEPTH;
   11182   p->accumulate_buf = NULL;
   11183   p->accumulate_buf_size = 0;
   11184   upb_byteshandler_init(&p->input_handler_);
   11185   upb_byteshandler_setstring(&p->input_handler_, parse, NULL);
   11186   upb_byteshandler_setendstr(&p->input_handler_, end, NULL);
   11187   upb_bytessink_reset(&p->input_, &p->input_handler_, p);
   11188 
   11189   json_parser_reset(p);
   11190   upb_sink_reset(&p->top->sink, output->handlers, output->closure);
   11191   p->top->m = upb_handlers_msgdef(output->handlers);
   11192 
   11193   /* If this fails, uncomment and increase the value in parser.h. */
   11194   /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */
   11195   assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE);
   11196   return p;
   11197 }
   11198 
   11199 upb_bytessink *upb_json_parser_input(upb_json_parser *p) {
   11200   return &p->input_;
   11201 }
   11202 /*
   11203 ** This currently uses snprintf() to format primitives, and could be optimized
   11204 ** further.
   11205 */
   11206 
   11207 
   11208 #include <stdlib.h>
   11209 #include <stdio.h>
   11210 #include <string.h>
   11211 #include <stdint.h>
   11212 
   11213 struct upb_json_printer {
   11214   upb_sink input_;
   11215   /* BytesSink closure. */
   11216   void *subc_;
   11217   upb_bytessink *output_;
   11218 
   11219   /* We track the depth so that we know when to emit startstr/endstr on the
   11220    * output. */
   11221   int depth_;
   11222 
   11223   /* Have we emitted the first element? This state is necessary to emit commas
   11224    * without leaving a trailing comma in arrays/maps. We keep this state per
   11225    * frame depth.
   11226    *
   11227    * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
   11228    * We count frames (contexts in which we separate elements by commas) as both
   11229    * repeated fields and messages (maps), and the worst case is a
   11230    * message->repeated field->submessage->repeated field->... nesting. */
   11231   bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
   11232 };
   11233 
   11234 /* StringPiece; a pointer plus a length. */
   11235 typedef struct {
   11236   const char *ptr;
   11237   size_t len;
   11238 } strpc;
   11239 
   11240 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) {
   11241   strpc *ret = malloc(sizeof(*ret));
   11242   ret->ptr = upb_fielddef_name(f);
   11243   ret->len = strlen(ret->ptr);
   11244   upb_handlers_addcleanup(h, ret, free);
   11245   return ret;
   11246 }
   11247 
   11248 /* ------------ JSON string printing: values, maps, arrays ------------------ */
   11249 
   11250 static void print_data(
   11251     upb_json_printer *p, const char *buf, unsigned int len) {
   11252   /* TODO: Will need to change if we support pushback from the sink. */
   11253   size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
   11254   UPB_ASSERT_VAR(n, n == len);
   11255 }
   11256 
   11257 static void print_comma(upb_json_printer *p) {
   11258   if (!p->first_elem_[p->depth_]) {
   11259     print_data(p, ",", 1);
   11260   }
   11261   p->first_elem_[p->depth_] = false;
   11262 }
   11263 
   11264 /* Helpers that print properly formatted elements to the JSON output stream. */
   11265 
   11266 /* Used for escaping control chars in strings. */
   11267 static const char kControlCharLimit = 0x20;
   11268 
   11269 UPB_INLINE bool is_json_escaped(char c) {
   11270   /* See RFC 4627. */
   11271   unsigned char uc = (unsigned char)c;
   11272   return uc < kControlCharLimit || uc == '"' || uc == '\\';
   11273 }
   11274 
   11275 UPB_INLINE char* json_nice_escape(char c) {
   11276   switch (c) {
   11277     case '"':  return "\\\"";
   11278     case '\\': return "\\\\";
   11279     case '\b': return "\\b";
   11280     case '\f': return "\\f";
   11281     case '\n': return "\\n";
   11282     case '\r': return "\\r";
   11283     case '\t': return "\\t";
   11284     default:   return NULL;
   11285   }
   11286 }
   11287 
   11288 /* Write a properly escaped string chunk. The surrounding quotes are *not*
   11289  * printed; this is so that the caller has the option of emitting the string
   11290  * content in chunks. */
   11291 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) {
   11292   const char* unescaped_run = NULL;
   11293   unsigned int i;
   11294   for (i = 0; i < len; i++) {
   11295     char c = buf[i];
   11296     /* Handle escaping. */
   11297     if (is_json_escaped(c)) {
   11298       /* Use a "nice" escape, like \n, if one exists for this character. */
   11299       const char* escape = json_nice_escape(c);
   11300       /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
   11301        * escape. */
   11302       char escape_buf[8];
   11303       if (!escape) {
   11304         unsigned char byte = (unsigned char)c;
   11305         _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
   11306         escape = escape_buf;
   11307       }
   11308 
   11309       /* N.B. that we assume that the input encoding is equal to the output
   11310        * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
   11311        * can simply pass the bytes through. */
   11312 
   11313       /* If there's a current run of unescaped chars, print that run first. */
   11314       if (unescaped_run) {
   11315         print_data(p, unescaped_run, &buf[i] - unescaped_run);
   11316         unescaped_run = NULL;
   11317       }
   11318       /* Then print the escape code. */
   11319       print_data(p, escape, strlen(escape));
   11320     } else {
   11321       /* Add to the current unescaped run of characters. */
   11322       if (unescaped_run == NULL) {
   11323         unescaped_run = &buf[i];
   11324       }
   11325     }
   11326   }
   11327 
   11328   /* If the string ended in a run of unescaped characters, print that last run. */
   11329   if (unescaped_run) {
   11330     print_data(p, unescaped_run, &buf[len] - unescaped_run);
   11331   }
   11332 }
   11333 
   11334 #define CHKLENGTH(x) if (!(x)) return -1;
   11335 
   11336 /* Helpers that format floating point values according to our custom formats.
   11337  * Right now we use %.8g and %.17g for float/double, respectively, to match
   11338  * proto2::util::JsonFormat's defaults.  May want to change this later. */
   11339 
   11340 static size_t fmt_double(double val, char* buf, size_t length) {
   11341   size_t n = _upb_snprintf(buf, length, "%.17g", val);
   11342   CHKLENGTH(n > 0 && n < length);
   11343   return n;
   11344 }
   11345 
   11346 static size_t fmt_float(float val, char* buf, size_t length) {
   11347   size_t n = _upb_snprintf(buf, length, "%.8g", val);
   11348   CHKLENGTH(n > 0 && n < length);
   11349   return n;
   11350 }
   11351 
   11352 static size_t fmt_bool(bool val, char* buf, size_t length) {
   11353   size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
   11354   CHKLENGTH(n > 0 && n < length);
   11355   return n;
   11356 }
   11357 
   11358 static size_t fmt_int64(long val, char* buf, size_t length) {
   11359   size_t n = _upb_snprintf(buf, length, "%ld", val);
   11360   CHKLENGTH(n > 0 && n < length);
   11361   return n;
   11362 }
   11363 
   11364 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) {
   11365   size_t n = _upb_snprintf(buf, length, "%llu", val);
   11366   CHKLENGTH(n > 0 && n < length);
   11367   return n;
   11368 }
   11369 
   11370 /* Print a map key given a field name. Called by scalar field handlers and by
   11371  * startseq for repeated fields. */
   11372 static bool putkey(void *closure, const void *handler_data) {
   11373   upb_json_printer *p = closure;
   11374   const strpc *key = handler_data;
   11375   print_comma(p);
   11376   print_data(p, "\"", 1);
   11377   putstring(p, key->ptr, key->len);
   11378   print_data(p, "\":", 2);
   11379   return true;
   11380 }
   11381 
   11382 #define CHKFMT(val) if ((val) == (size_t)-1) return false;
   11383 #define CHK(val)    if (!(val)) return false;
   11384 
   11385 #define TYPE_HANDLERS(type, fmt_func)                                        \
   11386   static bool put##type(void *closure, const void *handler_data, type val) { \
   11387     upb_json_printer *p = closure;                                           \
   11388     char data[64];                                                           \
   11389     size_t length = fmt_func(val, data, sizeof(data));                       \
   11390     UPB_UNUSED(handler_data);                                                \
   11391     CHKFMT(length);                                                          \
   11392     print_data(p, data, length);                                             \
   11393     return true;                                                             \
   11394   }                                                                          \
   11395   static bool scalar_##type(void *closure, const void *handler_data,         \
   11396                             type val) {                                      \
   11397     CHK(putkey(closure, handler_data));                                      \
   11398     CHK(put##type(closure, handler_data, val));                              \
   11399     return true;                                                             \
   11400   }                                                                          \
   11401   static bool repeated_##type(void *closure, const void *handler_data,       \
   11402                               type val) {                                    \
   11403     upb_json_printer *p = closure;                                           \
   11404     print_comma(p);                                                          \
   11405     CHK(put##type(closure, handler_data, val));                              \
   11406     return true;                                                             \
   11407   }
   11408 
   11409 #define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
   11410   static bool putmapkey_##type(void *closure, const void *handler_data,      \
   11411                             type val) {                                      \
   11412     upb_json_printer *p = closure;                                           \
   11413     print_data(p, "\"", 1);                                                  \
   11414     CHK(put##type(closure, handler_data, val));                              \
   11415     print_data(p, "\":", 2);                                                 \
   11416     return true;                                                             \
   11417   }
   11418 
   11419 TYPE_HANDLERS(double,   fmt_double)
   11420 TYPE_HANDLERS(float,    fmt_float)
   11421 TYPE_HANDLERS(bool,     fmt_bool)
   11422 TYPE_HANDLERS(int32_t,  fmt_int64)
   11423 TYPE_HANDLERS(uint32_t, fmt_int64)
   11424 TYPE_HANDLERS(int64_t,  fmt_int64)
   11425 TYPE_HANDLERS(uint64_t, fmt_uint64)
   11426 
   11427 /* double and float are not allowed to be map keys. */
   11428 TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
   11429 TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64)
   11430 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64)
   11431 TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64)
   11432 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64)
   11433 
   11434 #undef TYPE_HANDLERS
   11435 #undef TYPE_HANDLERS_MAPKEY
   11436 
   11437 typedef struct {
   11438   void *keyname;
   11439   const upb_enumdef *enumdef;
   11440 } EnumHandlerData;
   11441 
   11442 static bool scalar_enum(void *closure, const void *handler_data,
   11443                         int32_t val) {
   11444   const EnumHandlerData *hd = handler_data;
   11445   upb_json_printer *p = closure;
   11446   const char *symbolic_name;
   11447 
   11448   CHK(putkey(closure, hd->keyname));
   11449 
   11450   symbolic_name = upb_enumdef_iton(hd->enumdef, val);
   11451   if (symbolic_name) {
   11452     print_data(p, "\"", 1);
   11453     putstring(p, symbolic_name, strlen(symbolic_name));
   11454     print_data(p, "\"", 1);
   11455   } else {
   11456     putint32_t(closure, NULL, val);
   11457   }
   11458 
   11459   return true;
   11460 }
   11461 
   11462 static void print_enum_symbolic_name(upb_json_printer *p,
   11463                                      const upb_enumdef *def,
   11464                                      int32_t val) {
   11465   const char *symbolic_name = upb_enumdef_iton(def, val);
   11466   if (symbolic_name) {
   11467     print_data(p, "\"", 1);
   11468     putstring(p, symbolic_name, strlen(symbolic_name));
   11469     print_data(p, "\"", 1);
   11470   } else {
   11471     putint32_t(p, NULL, val);
   11472   }
   11473 }
   11474 
   11475 static bool repeated_enum(void *closure, const void *handler_data,
   11476                           int32_t val) {
   11477   const EnumHandlerData *hd = handler_data;
   11478   upb_json_printer *p = closure;
   11479   print_comma(p);
   11480 
   11481   print_enum_symbolic_name(p, hd->enumdef, val);
   11482 
   11483   return true;
   11484 }
   11485 
   11486 static bool mapvalue_enum(void *closure, const void *handler_data,
   11487                           int32_t val) {
   11488   const EnumHandlerData *hd = handler_data;
   11489   upb_json_printer *p = closure;
   11490 
   11491   print_enum_symbolic_name(p, hd->enumdef, val);
   11492 
   11493   return true;
   11494 }
   11495 
   11496 static void *scalar_startsubmsg(void *closure, const void *handler_data) {
   11497   return putkey(closure, handler_data) ? closure : UPB_BREAK;
   11498 }
   11499 
   11500 static void *repeated_startsubmsg(void *closure, const void *handler_data) {
   11501   upb_json_printer *p = closure;
   11502   UPB_UNUSED(handler_data);
   11503   print_comma(p);
   11504   return closure;
   11505 }
   11506 
   11507 static void start_frame(upb_json_printer *p) {
   11508   p->depth_++;
   11509   p->first_elem_[p->depth_] = true;
   11510   print_data(p, "{", 1);
   11511 }
   11512 
   11513 static void end_frame(upb_json_printer *p) {
   11514   print_data(p, "}", 1);
   11515   p->depth_--;
   11516 }
   11517 
   11518 static bool printer_startmsg(void *closure, const void *handler_data) {
   11519   upb_json_printer *p = closure;
   11520   UPB_UNUSED(handler_data);
   11521   if (p->depth_ == 0) {
   11522     upb_bytessink_start(p->output_, 0, &p->subc_);
   11523   }
   11524   start_frame(p);
   11525   return true;
   11526 }
   11527 
   11528 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
   11529   upb_json_printer *p = closure;
   11530   UPB_UNUSED(handler_data);
   11531   UPB_UNUSED(s);
   11532   end_frame(p);
   11533   if (p->depth_ == 0) {
   11534     upb_bytessink_end(p->output_);
   11535   }
   11536   return true;
   11537 }
   11538 
   11539 static void *startseq(void *closure, const void *handler_data) {
   11540   upb_json_printer *p = closure;
   11541   CHK(putkey(closure, handler_data));
   11542   p->depth_++;
   11543   p->first_elem_[p->depth_] = true;
   11544   print_data(p, "[", 1);
   11545   return closure;
   11546 }
   11547 
   11548 static bool endseq(void *closure, const void *handler_data) {
   11549   upb_json_printer *p = closure;
   11550   UPB_UNUSED(handler_data);
   11551   print_data(p, "]", 1);
   11552   p->depth_--;
   11553   return true;
   11554 }
   11555 
   11556 static void *startmap(void *closure, const void *handler_data) {
   11557   upb_json_printer *p = closure;
   11558   CHK(putkey(closure, handler_data));
   11559   p->depth_++;
   11560   p->first_elem_[p->depth_] = true;
   11561   print_data(p, "{", 1);
   11562   return closure;
   11563 }
   11564 
   11565 static bool endmap(void *closure, const void *handler_data) {
   11566   upb_json_printer *p = closure;
   11567   UPB_UNUSED(handler_data);
   11568   print_data(p, "}", 1);
   11569   p->depth_--;
   11570   return true;
   11571 }
   11572 
   11573 static size_t putstr(void *closure, const void *handler_data, const char *str,
   11574                      size_t len, const upb_bufhandle *handle) {
   11575   upb_json_printer *p = closure;
   11576   UPB_UNUSED(handler_data);
   11577   UPB_UNUSED(handle);
   11578   putstring(p, str, len);
   11579   return len;
   11580 }
   11581 
   11582 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
   11583 static size_t putbytes(void *closure, const void *handler_data, const char *str,
   11584                        size_t len, const upb_bufhandle *handle) {
   11585   upb_json_printer *p = closure;
   11586 
   11587   /* This is the regular base64, not the "web-safe" version. */
   11588   static const char base64[] =
   11589       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
   11590 
   11591   /* Base64-encode. */
   11592   char data[16000];
   11593   const char *limit = data + sizeof(data);
   11594   const unsigned char *from = (const unsigned char*)str;
   11595   char *to = data;
   11596   size_t remaining = len;
   11597   size_t bytes;
   11598 
   11599   UPB_UNUSED(handler_data);
   11600   UPB_UNUSED(handle);
   11601 
   11602   while (remaining > 2) {
   11603     /* TODO(haberman): handle encoded lengths > sizeof(data) */
   11604     UPB_ASSERT_VAR(limit, (limit - to) >= 4);
   11605 
   11606     to[0] = base64[from[0] >> 2];
   11607     to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
   11608     to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
   11609     to[3] = base64[from[2] & 0x3f];
   11610 
   11611     remaining -= 3;
   11612     to += 4;
   11613     from += 3;
   11614   }
   11615 
   11616   switch (remaining) {
   11617     case 2:
   11618       to[0] = base64[from[0] >> 2];
   11619       to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
   11620       to[2] = base64[(from[1] & 0xf) << 2];
   11621       to[3] = '=';
   11622       to += 4;
   11623       from += 2;
   11624       break;
   11625     case 1:
   11626       to[0] = base64[from[0] >> 2];
   11627       to[1] = base64[((from[0] & 0x3) << 4)];
   11628       to[2] = '=';
   11629       to[3] = '=';
   11630       to += 4;
   11631       from += 1;
   11632       break;
   11633   }
   11634 
   11635   bytes = to - data;
   11636   print_data(p, "\"", 1);
   11637   putstring(p, data, bytes);
   11638   print_data(p, "\"", 1);
   11639   return len;
   11640 }
   11641 
   11642 static void *scalar_startstr(void *closure, const void *handler_data,
   11643                              size_t size_hint) {
   11644   upb_json_printer *p = closure;
   11645   UPB_UNUSED(handler_data);
   11646   UPB_UNUSED(size_hint);
   11647   CHK(putkey(closure, handler_data));
   11648   print_data(p, "\"", 1);
   11649   return p;
   11650 }
   11651 
   11652 static size_t scalar_str(void *closure, const void *handler_data,
   11653                          const char *str, size_t len,
   11654                          const upb_bufhandle *handle) {
   11655   CHK(putstr(closure, handler_data, str, len, handle));
   11656   return len;
   11657 }
   11658 
   11659 static bool scalar_endstr(void *closure, const void *handler_data) {
   11660   upb_json_printer *p = closure;
   11661   UPB_UNUSED(handler_data);
   11662   print_data(p, "\"", 1);
   11663   return true;
   11664 }
   11665 
   11666 static void *repeated_startstr(void *closure, const void *handler_data,
   11667                                size_t size_hint) {
   11668   upb_json_printer *p = closure;
   11669   UPB_UNUSED(handler_data);
   11670   UPB_UNUSED(size_hint);
   11671   print_comma(p);
   11672   print_data(p, "\"", 1);
   11673   return p;
   11674 }
   11675 
   11676 static size_t repeated_str(void *closure, const void *handler_data,
   11677                            const char *str, size_t len,
   11678                            const upb_bufhandle *handle) {
   11679   CHK(putstr(closure, handler_data, str, len, handle));
   11680   return len;
   11681 }
   11682 
   11683 static bool repeated_endstr(void *closure, const void *handler_data) {
   11684   upb_json_printer *p = closure;
   11685   UPB_UNUSED(handler_data);
   11686   print_data(p, "\"", 1);
   11687   return true;
   11688 }
   11689 
   11690 static void *mapkeyval_startstr(void *closure, const void *handler_data,
   11691                                 size_t size_hint) {
   11692   upb_json_printer *p = closure;
   11693   UPB_UNUSED(handler_data);
   11694   UPB_UNUSED(size_hint);
   11695   print_data(p, "\"", 1);
   11696   return p;
   11697 }
   11698 
   11699 static size_t mapkey_str(void *closure, const void *handler_data,
   11700                          const char *str, size_t len,
   11701                          const upb_bufhandle *handle) {
   11702   CHK(putstr(closure, handler_data, str, len, handle));
   11703   return len;
   11704 }
   11705 
   11706 static bool mapkey_endstr(void *closure, const void *handler_data) {
   11707   upb_json_printer *p = closure;
   11708   UPB_UNUSED(handler_data);
   11709   print_data(p, "\":", 2);
   11710   return true;
   11711 }
   11712 
   11713 static bool mapvalue_endstr(void *closure, const void *handler_data) {
   11714   upb_json_printer *p = closure;
   11715   UPB_UNUSED(handler_data);
   11716   print_data(p, "\"", 1);
   11717   return true;
   11718 }
   11719 
   11720 static size_t scalar_bytes(void *closure, const void *handler_data,
   11721                            const char *str, size_t len,
   11722                            const upb_bufhandle *handle) {
   11723   CHK(putkey(closure, handler_data));
   11724   CHK(putbytes(closure, handler_data, str, len, handle));
   11725   return len;
   11726 }
   11727 
   11728 static size_t repeated_bytes(void *closure, const void *handler_data,
   11729                              const char *str, size_t len,
   11730                              const upb_bufhandle *handle) {
   11731   upb_json_printer *p = closure;
   11732   print_comma(p);
   11733   CHK(putbytes(closure, handler_data, str, len, handle));
   11734   return len;
   11735 }
   11736 
   11737 static size_t mapkey_bytes(void *closure, const void *handler_data,
   11738                            const char *str, size_t len,
   11739                            const upb_bufhandle *handle) {
   11740   upb_json_printer *p = closure;
   11741   CHK(putbytes(closure, handler_data, str, len, handle));
   11742   print_data(p, ":", 1);
   11743   return len;
   11744 }
   11745 
   11746 static void set_enum_hd(upb_handlers *h,
   11747                         const upb_fielddef *f,
   11748                         upb_handlerattr *attr) {
   11749   EnumHandlerData *hd = malloc(sizeof(EnumHandlerData));
   11750   hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f);
   11751   hd->keyname = newstrpc(h, f);
   11752   upb_handlers_addcleanup(h, hd, free);
   11753   upb_handlerattr_sethandlerdata(attr, hd);
   11754 }
   11755 
   11756 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
   11757  * in a map).
   11758  *
   11759  * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
   11760  * key or value cases properly. The right way to do this is to allocate a
   11761  * temporary structure at the start of a mapentry submessage, store key and
   11762  * value data in it as key and value handlers are called, and then print the
   11763  * key/value pair once at the end of the submessage. If we don't do this, we
   11764  * should at least detect the case and throw an error. However, so far all of
   11765  * our sources that emit mapentry messages do so canonically (with one key
   11766  * field, and then one value field), so this is not a pressing concern at the
   11767  * moment. */
   11768 void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) {
   11769   const upb_msgdef *md = upb_handlers_msgdef(h);
   11770 
   11771   /* A mapentry message is printed simply as '"key": value'. Rather than
   11772    * special-case key and value for every type below, we just handle both
   11773    * fields explicitly here. */
   11774   const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
   11775   const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
   11776 
   11777   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
   11778 
   11779   UPB_UNUSED(closure);
   11780 
   11781   switch (upb_fielddef_type(key_field)) {
   11782     case UPB_TYPE_INT32:
   11783       upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
   11784       break;
   11785     case UPB_TYPE_INT64:
   11786       upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
   11787       break;
   11788     case UPB_TYPE_UINT32:
   11789       upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
   11790       break;
   11791     case UPB_TYPE_UINT64:
   11792       upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
   11793       break;
   11794     case UPB_TYPE_BOOL:
   11795       upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
   11796       break;
   11797     case UPB_TYPE_STRING:
   11798       upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
   11799       upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
   11800       upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
   11801       break;
   11802     case UPB_TYPE_BYTES:
   11803       upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
   11804       break;
   11805     default:
   11806       assert(false);
   11807       break;
   11808   }
   11809 
   11810   switch (upb_fielddef_type(value_field)) {
   11811     case UPB_TYPE_INT32:
   11812       upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
   11813       break;
   11814     case UPB_TYPE_INT64:
   11815       upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
   11816       break;
   11817     case UPB_TYPE_UINT32:
   11818       upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
   11819       break;
   11820     case UPB_TYPE_UINT64:
   11821       upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
   11822       break;
   11823     case UPB_TYPE_BOOL:
   11824       upb_handlers_setbool(h, value_field, putbool, &empty_attr);
   11825       break;
   11826     case UPB_TYPE_FLOAT:
   11827       upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
   11828       break;
   11829     case UPB_TYPE_DOUBLE:
   11830       upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
   11831       break;
   11832     case UPB_TYPE_STRING:
   11833       upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
   11834       upb_handlers_setstring(h, value_field, putstr, &empty_attr);
   11835       upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
   11836       break;
   11837     case UPB_TYPE_BYTES:
   11838       upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
   11839       break;
   11840     case UPB_TYPE_ENUM: {
   11841       upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
   11842       set_enum_hd(h, value_field, &enum_attr);
   11843       upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
   11844       upb_handlerattr_uninit(&enum_attr);
   11845       break;
   11846     }
   11847     case UPB_TYPE_MESSAGE:
   11848       /* No handler necessary -- the submsg handlers will print the message
   11849        * as appropriate. */
   11850       break;
   11851   }
   11852 
   11853   upb_handlerattr_uninit(&empty_attr);
   11854 }
   11855 
   11856 void printer_sethandlers(const void *closure, upb_handlers *h) {
   11857   const upb_msgdef *md = upb_handlers_msgdef(h);
   11858   bool is_mapentry = upb_msgdef_mapentry(md);
   11859   upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER;
   11860   upb_msg_field_iter i;
   11861 
   11862   UPB_UNUSED(closure);
   11863 
   11864   if (is_mapentry) {
   11865     /* mapentry messages are sufficiently different that we handle them
   11866      * separately. */
   11867     printer_sethandlers_mapentry(closure, h);
   11868     return;
   11869   }
   11870 
   11871   upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
   11872   upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
   11873 
   11874 #define TYPE(type, name, ctype)                                               \
   11875   case type:                                                                  \
   11876     if (upb_fielddef_isseq(f)) {                                              \
   11877       upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
   11878     } else {                                                                  \
   11879       upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
   11880     }                                                                         \
   11881     break;
   11882 
   11883   upb_msg_field_begin(&i, md);
   11884   for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
   11885     const upb_fielddef *f = upb_msg_iter_field(&i);
   11886 
   11887     upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER;
   11888     upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f));
   11889 
   11890     if (upb_fielddef_ismap(f)) {
   11891       upb_handlers_setstartseq(h, f, startmap, &name_attr);
   11892       upb_handlers_setendseq(h, f, endmap, &name_attr);
   11893     } else if (upb_fielddef_isseq(f)) {
   11894       upb_handlers_setstartseq(h, f, startseq, &name_attr);
   11895       upb_handlers_setendseq(h, f, endseq, &empty_attr);
   11896     }
   11897 
   11898     switch (upb_fielddef_type(f)) {
   11899       TYPE(UPB_TYPE_FLOAT,  float,  float);
   11900       TYPE(UPB_TYPE_DOUBLE, double, double);
   11901       TYPE(UPB_TYPE_BOOL,   bool,   bool);
   11902       TYPE(UPB_TYPE_INT32,  int32,  int32_t);
   11903       TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
   11904       TYPE(UPB_TYPE_INT64,  int64,  int64_t);
   11905       TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
   11906       case UPB_TYPE_ENUM: {
   11907         /* For now, we always emit symbolic names for enums. We may want an
   11908          * option later to control this behavior, but we will wait for a real
   11909          * need first. */
   11910         upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER;
   11911         set_enum_hd(h, f, &enum_attr);
   11912 
   11913         if (upb_fielddef_isseq(f)) {
   11914           upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
   11915         } else {
   11916           upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
   11917         }
   11918 
   11919         upb_handlerattr_uninit(&enum_attr);
   11920         break;
   11921       }
   11922       case UPB_TYPE_STRING:
   11923         if (upb_fielddef_isseq(f)) {
   11924           upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
   11925           upb_handlers_setstring(h, f, repeated_str, &empty_attr);
   11926           upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
   11927         } else {
   11928           upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
   11929           upb_handlers_setstring(h, f, scalar_str, &empty_attr);
   11930           upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
   11931         }
   11932         break;
   11933       case UPB_TYPE_BYTES:
   11934         /* XXX: this doesn't support strings that span buffers yet. The base64
   11935          * encoder will need to be made resumable for this to work properly. */
   11936         if (upb_fielddef_isseq(f)) {
   11937           upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
   11938         } else {
   11939           upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
   11940         }
   11941         break;
   11942       case UPB_TYPE_MESSAGE:
   11943         if (upb_fielddef_isseq(f)) {
   11944           upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
   11945         } else {
   11946           upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
   11947         }
   11948         break;
   11949     }
   11950 
   11951     upb_handlerattr_uninit(&name_attr);
   11952   }
   11953 
   11954   upb_handlerattr_uninit(&empty_attr);
   11955 #undef TYPE
   11956 }
   11957 
   11958 static void json_printer_reset(upb_json_printer *p) {
   11959   p->depth_ = 0;
   11960 }
   11961 
   11962 
   11963 /* Public API *****************************************************************/
   11964 
   11965 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h,
   11966                                           upb_bytessink *output) {
   11967 #ifndef NDEBUG
   11968   size_t size_before = upb_env_bytesallocated(e);
   11969 #endif
   11970 
   11971   upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer));
   11972   if (!p) return NULL;
   11973 
   11974   p->output_ = output;
   11975   json_printer_reset(p);
   11976   upb_sink_reset(&p->input_, h, p);
   11977 
   11978   /* If this fails, increase the value in printer.h. */
   11979   assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE);
   11980   return p;
   11981 }
   11982 
   11983 upb_sink *upb_json_printer_input(upb_json_printer *p) {
   11984   return &p->input_;
   11985 }
   11986 
   11987 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md,
   11988                                                  const void *owner) {
   11989   return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL);
   11990 }
   11991