1 // Amalgamated source file 2 #include "upb.h" 3 4 5 #include <stdlib.h> 6 #include <string.h> 7 8 typedef struct { 9 size_t len; 10 char str[1]; /* Null-terminated string data follows. */ 11 } str_t; 12 13 static str_t *newstr(const char *data, size_t len) { 14 str_t *ret = malloc(sizeof(*ret) + len); 15 if (!ret) return NULL; 16 ret->len = len; 17 memcpy(ret->str, data, len); 18 ret->str[len] = '\0'; 19 return ret; 20 } 21 22 static void freestr(str_t *s) { free(s); } 23 24 /* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */ 25 static bool upb_isbetween(char c, char low, char high) { 26 return c >= low && c <= high; 27 } 28 29 static bool upb_isletter(char c) { 30 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; 31 } 32 33 static bool upb_isalphanum(char c) { 34 return upb_isletter(c) || upb_isbetween(c, '0', '9'); 35 } 36 37 static bool upb_isident(const char *str, size_t len, bool full, upb_status *s) { 38 bool start = true; 39 size_t i; 40 for (i = 0; i < len; i++) { 41 char c = str[i]; 42 if (c == '.') { 43 if (start || !full) { 44 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str); 45 return false; 46 } 47 start = true; 48 } else if (start) { 49 if (!upb_isletter(c)) { 50 upb_status_seterrf( 51 s, "invalid name: path components must start with a letter (%s)", 52 str); 53 return false; 54 } 55 start = false; 56 } else { 57 if (!upb_isalphanum(c)) { 58 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)", 59 str); 60 return false; 61 } 62 } 63 } 64 return !start; 65 } 66 67 68 /* upb_def ********************************************************************/ 69 70 upb_deftype_t upb_def_type(const upb_def *d) { return d->type; } 71 72 const char *upb_def_fullname(const upb_def *d) { return d->fullname; } 73 74 bool upb_def_setfullname(upb_def *def, const char *fullname, upb_status *s) { 75 assert(!upb_def_isfrozen(def)); 76 if (!upb_isident(fullname, strlen(fullname), true, s)) return false; 77 free((void*)def->fullname); 78 def->fullname = upb_strdup(fullname); 79 return true; 80 } 81 82 upb_def *upb_def_dup(const upb_def *def, const void *o) { 83 switch (def->type) { 84 case UPB_DEF_MSG: 85 return upb_msgdef_upcast_mutable( 86 upb_msgdef_dup(upb_downcast_msgdef(def), o)); 87 case UPB_DEF_FIELD: 88 return upb_fielddef_upcast_mutable( 89 upb_fielddef_dup(upb_downcast_fielddef(def), o)); 90 case UPB_DEF_ENUM: 91 return upb_enumdef_upcast_mutable( 92 upb_enumdef_dup(upb_downcast_enumdef(def), o)); 93 default: assert(false); return NULL; 94 } 95 } 96 97 static bool upb_def_init(upb_def *def, upb_deftype_t type, 98 const struct upb_refcounted_vtbl *vtbl, 99 const void *owner) { 100 if (!upb_refcounted_init(upb_def_upcast_mutable(def), vtbl, owner)) return false; 101 def->type = type; 102 def->fullname = NULL; 103 def->came_from_user = false; 104 return true; 105 } 106 107 static void upb_def_uninit(upb_def *def) { 108 free((void*)def->fullname); 109 } 110 111 static const char *msgdef_name(const upb_msgdef *m) { 112 const char *name = upb_def_fullname(upb_msgdef_upcast(m)); 113 return name ? name : "(anonymous)"; 114 } 115 116 static bool upb_validate_field(upb_fielddef *f, upb_status *s) { 117 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { 118 upb_status_seterrmsg(s, "fielddef must have name and number set"); 119 return false; 120 } 121 122 if (!f->type_is_set_) { 123 upb_status_seterrmsg(s, "fielddef type was not initialized"); 124 return false; 125 } 126 127 if (upb_fielddef_lazy(f) && 128 upb_fielddef_descriptortype(f) != UPB_DESCRIPTOR_TYPE_MESSAGE) { 129 upb_status_seterrmsg(s, 130 "only length-delimited submessage fields may be lazy"); 131 return false; 132 } 133 134 if (upb_fielddef_hassubdef(f)) { 135 const upb_def *subdef; 136 137 if (f->subdef_is_symbolic) { 138 upb_status_seterrf(s, "field '%s.%s' has not been resolved", 139 msgdef_name(f->msg.def), upb_fielddef_name(f)); 140 return false; 141 } 142 143 subdef = upb_fielddef_subdef(f); 144 if (subdef == NULL) { 145 upb_status_seterrf(s, "field %s.%s is missing required subdef", 146 msgdef_name(f->msg.def), upb_fielddef_name(f)); 147 return false; 148 } 149 150 if (!upb_def_isfrozen(subdef) && !subdef->came_from_user) { 151 upb_status_seterrf(s, 152 "subdef of field %s.%s is not frozen or being frozen", 153 msgdef_name(f->msg.def), upb_fielddef_name(f)); 154 return false; 155 } 156 } 157 158 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) { 159 bool has_default_name = upb_fielddef_enumhasdefaultstr(f); 160 bool has_default_number = upb_fielddef_enumhasdefaultint32(f); 161 162 /* Previously verified by upb_validate_enumdef(). */ 163 assert(upb_enumdef_numvals(upb_fielddef_enumsubdef(f)) > 0); 164 165 /* We've already validated that we have an associated enumdef and that it 166 * has at least one member, so at least one of these should be true. 167 * Because if the user didn't set anything, we'll pick up the enum's 168 * default, but if the user *did* set something we should at least pick up 169 * the one they set (int32 or string). */ 170 assert(has_default_name || has_default_number); 171 172 if (!has_default_name) { 173 upb_status_seterrf(s, 174 "enum default for field %s.%s (%d) is not in the enum", 175 msgdef_name(f->msg.def), upb_fielddef_name(f), 176 upb_fielddef_defaultint32(f)); 177 return false; 178 } 179 180 if (!has_default_number) { 181 upb_status_seterrf(s, 182 "enum default for field %s.%s (%s) is not in the enum", 183 msgdef_name(f->msg.def), upb_fielddef_name(f), 184 upb_fielddef_defaultstr(f, NULL)); 185 return false; 186 } 187 188 /* Lift the effective numeric default into the field's default slot, in case 189 * we were only getting it "by reference" from the enumdef. */ 190 upb_fielddef_setdefaultint32(f, upb_fielddef_defaultint32(f)); 191 } 192 193 /* Ensure that MapEntry submessages only appear as repeated fields, not 194 * optional/required (singular) fields. */ 195 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && 196 upb_fielddef_msgsubdef(f) != NULL) { 197 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); 198 if (upb_msgdef_mapentry(subdef) && !upb_fielddef_isseq(f)) { 199 upb_status_seterrf(s, 200 "Field %s refers to mapentry message but is not " 201 "a repeated field", 202 upb_fielddef_name(f) ? upb_fielddef_name(f) : 203 "(unnamed)"); 204 return false; 205 } 206 } 207 208 return true; 209 } 210 211 static bool upb_validate_enumdef(const upb_enumdef *e, upb_status *s) { 212 if (upb_enumdef_numvals(e) == 0) { 213 upb_status_seterrf(s, "enum %s has no members (must have at least one)", 214 upb_enumdef_fullname(e)); 215 return false; 216 } 217 218 return true; 219 } 220 221 /* All submessage fields are lower than all other fields. 222 * Secondly, fields are increasing in order. */ 223 uint32_t field_rank(const upb_fielddef *f) { 224 uint32_t ret = upb_fielddef_number(f); 225 const uint32_t high_bit = 1 << 30; 226 assert(ret < high_bit); 227 if (!upb_fielddef_issubmsg(f)) 228 ret |= high_bit; 229 return ret; 230 } 231 232 int cmp_fields(const void *p1, const void *p2) { 233 const upb_fielddef *f1 = *(upb_fielddef*const*)p1; 234 const upb_fielddef *f2 = *(upb_fielddef*const*)p2; 235 return field_rank(f1) - field_rank(f2); 236 } 237 238 static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { 239 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the 240 * lowest indexes, but we do not publicly guarantee this. */ 241 upb_msg_field_iter j; 242 int i; 243 uint32_t selector; 244 int n = upb_msgdef_numfields(m); 245 upb_fielddef **fields = malloc(n * sizeof(*fields)); 246 if (!fields) return false; 247 248 m->submsg_field_count = 0; 249 for(i = 0, upb_msg_field_begin(&j, m); 250 !upb_msg_field_done(&j); 251 upb_msg_field_next(&j), i++) { 252 upb_fielddef *f = upb_msg_iter_field(&j); 253 assert(f->msg.def == m); 254 if (!upb_validate_field(f, s)) { 255 free(fields); 256 return false; 257 } 258 if (upb_fielddef_issubmsg(f)) { 259 m->submsg_field_count++; 260 } 261 fields[i] = f; 262 } 263 264 qsort(fields, n, sizeof(*fields), cmp_fields); 265 266 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; 267 for (i = 0; i < n; i++) { 268 upb_fielddef *f = fields[i]; 269 f->index_ = i; 270 f->selector_base = selector + upb_handlers_selectorbaseoffset(f); 271 selector += upb_handlers_selectorcount(f); 272 } 273 m->selector_count = selector; 274 275 #ifndef NDEBUG 276 { 277 /* Verify that all selectors for the message are distinct. */ 278 #define TRY(type) \ 279 if (upb_handlers_getselector(f, type, &sel)) upb_inttable_insert(&t, sel, v); 280 281 upb_inttable t; 282 upb_value v; 283 upb_selector_t sel; 284 285 upb_inttable_init(&t, UPB_CTYPE_BOOL); 286 v = upb_value_bool(true); 287 upb_inttable_insert(&t, UPB_STARTMSG_SELECTOR, v); 288 upb_inttable_insert(&t, UPB_ENDMSG_SELECTOR, v); 289 for(upb_msg_field_begin(&j, m); 290 !upb_msg_field_done(&j); 291 upb_msg_field_next(&j)) { 292 upb_fielddef *f = upb_msg_iter_field(&j); 293 /* These calls will assert-fail in upb_table if the value already 294 * exists. */ 295 TRY(UPB_HANDLER_INT32); 296 TRY(UPB_HANDLER_INT64) 297 TRY(UPB_HANDLER_UINT32) 298 TRY(UPB_HANDLER_UINT64) 299 TRY(UPB_HANDLER_FLOAT) 300 TRY(UPB_HANDLER_DOUBLE) 301 TRY(UPB_HANDLER_BOOL) 302 TRY(UPB_HANDLER_STARTSTR) 303 TRY(UPB_HANDLER_STRING) 304 TRY(UPB_HANDLER_ENDSTR) 305 TRY(UPB_HANDLER_STARTSUBMSG) 306 TRY(UPB_HANDLER_ENDSUBMSG) 307 TRY(UPB_HANDLER_STARTSEQ) 308 TRY(UPB_HANDLER_ENDSEQ) 309 } 310 upb_inttable_uninit(&t); 311 } 312 #undef TRY 313 #endif 314 315 free(fields); 316 return true; 317 } 318 319 bool upb_def_freeze(upb_def *const* defs, int n, upb_status *s) { 320 int i; 321 int maxdepth; 322 bool ret; 323 upb_status_clear(s); 324 325 /* First perform validation, in two passes so we can check that we have a 326 * transitive closure without needing to search. */ 327 for (i = 0; i < n; i++) { 328 upb_def *def = defs[i]; 329 if (upb_def_isfrozen(def)) { 330 /* Could relax this requirement if it's annoying. */ 331 upb_status_seterrmsg(s, "def is already frozen"); 332 goto err; 333 } else if (def->type == UPB_DEF_FIELD) { 334 upb_status_seterrmsg(s, "standalone fielddefs can not be frozen"); 335 goto err; 336 } else if (def->type == UPB_DEF_ENUM) { 337 if (!upb_validate_enumdef(upb_dyncast_enumdef(def), s)) { 338 goto err; 339 } 340 } else { 341 /* Set now to detect transitive closure in the second pass. */ 342 def->came_from_user = true; 343 } 344 } 345 346 /* Second pass of validation. Also assign selector bases and indexes, and 347 * compact tables. */ 348 for (i = 0; i < n; i++) { 349 upb_msgdef *m = upb_dyncast_msgdef_mutable(defs[i]); 350 upb_enumdef *e = upb_dyncast_enumdef_mutable(defs[i]); 351 if (m) { 352 upb_inttable_compact(&m->itof); 353 if (!assign_msg_indices(m, s)) { 354 goto err; 355 } 356 } else if (e) { 357 upb_inttable_compact(&e->iton); 358 } 359 } 360 361 /* Def graph contains FieldDefs between each MessageDef, so double the 362 * limit. */ 363 maxdepth = UPB_MAX_MESSAGE_DEPTH * 2; 364 365 /* Validation all passed; freeze the defs. */ 366 ret = upb_refcounted_freeze((upb_refcounted * const *)defs, n, s, maxdepth); 367 assert(!(s && ret != upb_ok(s))); 368 return ret; 369 370 err: 371 for (i = 0; i < n; i++) { 372 defs[i]->came_from_user = false; 373 } 374 assert(!(s && upb_ok(s))); 375 return false; 376 } 377 378 379 /* upb_enumdef ****************************************************************/ 380 381 static void upb_enumdef_free(upb_refcounted *r) { 382 upb_enumdef *e = (upb_enumdef*)r; 383 upb_inttable_iter i; 384 upb_inttable_begin(&i, &e->iton); 385 for( ; !upb_inttable_done(&i); upb_inttable_next(&i)) { 386 /* To clean up the upb_strdup() from upb_enumdef_addval(). */ 387 free(upb_value_getcstr(upb_inttable_iter_value(&i))); 388 } 389 upb_strtable_uninit(&e->ntoi); 390 upb_inttable_uninit(&e->iton); 391 upb_def_uninit(upb_enumdef_upcast_mutable(e)); 392 free(e); 393 } 394 395 upb_enumdef *upb_enumdef_new(const void *owner) { 396 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_enumdef_free}; 397 upb_enumdef *e = malloc(sizeof(*e)); 398 if (!e) return NULL; 399 if (!upb_def_init(upb_enumdef_upcast_mutable(e), UPB_DEF_ENUM, &vtbl, owner)) 400 goto err2; 401 if (!upb_strtable_init(&e->ntoi, UPB_CTYPE_INT32)) goto err2; 402 if (!upb_inttable_init(&e->iton, UPB_CTYPE_CSTR)) goto err1; 403 return e; 404 405 err1: 406 upb_strtable_uninit(&e->ntoi); 407 err2: 408 free(e); 409 return NULL; 410 } 411 412 upb_enumdef *upb_enumdef_dup(const upb_enumdef *e, const void *owner) { 413 upb_enum_iter i; 414 upb_enumdef *new_e = upb_enumdef_new(owner); 415 if (!new_e) return NULL; 416 for(upb_enum_begin(&i, e); !upb_enum_done(&i); upb_enum_next(&i)) { 417 bool success = upb_enumdef_addval( 418 new_e, upb_enum_iter_name(&i),upb_enum_iter_number(&i), NULL); 419 if (!success) { 420 upb_enumdef_unref(new_e, owner); 421 return NULL; 422 } 423 } 424 return new_e; 425 } 426 427 bool upb_enumdef_freeze(upb_enumdef *e, upb_status *status) { 428 upb_def *d = upb_enumdef_upcast_mutable(e); 429 return upb_def_freeze(&d, 1, status); 430 } 431 432 const char *upb_enumdef_fullname(const upb_enumdef *e) { 433 return upb_def_fullname(upb_enumdef_upcast(e)); 434 } 435 436 bool upb_enumdef_setfullname(upb_enumdef *e, const char *fullname, 437 upb_status *s) { 438 return upb_def_setfullname(upb_enumdef_upcast_mutable(e), fullname, s); 439 } 440 441 bool upb_enumdef_addval(upb_enumdef *e, const char *name, int32_t num, 442 upb_status *status) { 443 if (!upb_isident(name, strlen(name), false, status)) { 444 return false; 445 } 446 if (upb_enumdef_ntoiz(e, name, NULL)) { 447 upb_status_seterrf(status, "name '%s' is already defined", name); 448 return false; 449 } 450 if (!upb_strtable_insert(&e->ntoi, name, upb_value_int32(num))) { 451 upb_status_seterrmsg(status, "out of memory"); 452 return false; 453 } 454 if (!upb_inttable_lookup(&e->iton, num, NULL) && 455 !upb_inttable_insert(&e->iton, num, upb_value_cstr(upb_strdup(name)))) { 456 upb_status_seterrmsg(status, "out of memory"); 457 upb_strtable_remove(&e->ntoi, name, NULL); 458 return false; 459 } 460 if (upb_enumdef_numvals(e) == 1) { 461 bool ok = upb_enumdef_setdefault(e, num, NULL); 462 UPB_ASSERT_VAR(ok, ok); 463 } 464 return true; 465 } 466 467 int32_t upb_enumdef_default(const upb_enumdef *e) { 468 assert(upb_enumdef_iton(e, e->defaultval)); 469 return e->defaultval; 470 } 471 472 bool upb_enumdef_setdefault(upb_enumdef *e, int32_t val, upb_status *s) { 473 assert(!upb_enumdef_isfrozen(e)); 474 if (!upb_enumdef_iton(e, val)) { 475 upb_status_seterrf(s, "number '%d' is not in the enum.", val); 476 return false; 477 } 478 e->defaultval = val; 479 return true; 480 } 481 482 int upb_enumdef_numvals(const upb_enumdef *e) { 483 return upb_strtable_count(&e->ntoi); 484 } 485 486 void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { 487 /* We iterate over the ntoi table, to account for duplicate numbers. */ 488 upb_strtable_begin(i, &e->ntoi); 489 } 490 491 void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } 492 bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } 493 494 bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, 495 size_t len, int32_t *num) { 496 upb_value v; 497 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) { 498 return false; 499 } 500 if (num) *num = upb_value_getint32(v); 501 return true; 502 } 503 504 const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { 505 upb_value v; 506 return upb_inttable_lookup32(&def->iton, num, &v) ? 507 upb_value_getcstr(v) : NULL; 508 } 509 510 const char *upb_enum_iter_name(upb_enum_iter *iter) { 511 return upb_strtable_iter_key(iter); 512 } 513 514 int32_t upb_enum_iter_number(upb_enum_iter *iter) { 515 return upb_value_getint32(upb_strtable_iter_value(iter)); 516 } 517 518 519 /* upb_fielddef ***************************************************************/ 520 521 static void upb_fielddef_init_default(upb_fielddef *f); 522 523 static void upb_fielddef_uninit_default(upb_fielddef *f) { 524 if (f->type_is_set_ && f->default_is_string && f->defaultval.bytes) 525 freestr(f->defaultval.bytes); 526 } 527 528 static void visitfield(const upb_refcounted *r, upb_refcounted_visit *visit, 529 void *closure) { 530 const upb_fielddef *f = (const upb_fielddef*)r; 531 if (upb_fielddef_containingtype(f)) { 532 visit(r, upb_msgdef_upcast2(upb_fielddef_containingtype(f)), closure); 533 } 534 if (upb_fielddef_containingoneof(f)) { 535 visit(r, upb_oneofdef_upcast2(upb_fielddef_containingoneof(f)), closure); 536 } 537 if (upb_fielddef_subdef(f)) { 538 visit(r, upb_def_upcast(upb_fielddef_subdef(f)), closure); 539 } 540 } 541 542 static void freefield(upb_refcounted *r) { 543 upb_fielddef *f = (upb_fielddef*)r; 544 upb_fielddef_uninit_default(f); 545 if (f->subdef_is_symbolic) 546 free(f->sub.name); 547 upb_def_uninit(upb_fielddef_upcast_mutable(f)); 548 free(f); 549 } 550 551 static const char *enumdefaultstr(const upb_fielddef *f) { 552 const upb_enumdef *e; 553 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM); 554 e = upb_fielddef_enumsubdef(f); 555 if (f->default_is_string && f->defaultval.bytes) { 556 /* Default was explicitly set as a string. */ 557 str_t *s = f->defaultval.bytes; 558 return s->str; 559 } else if (e) { 560 if (!f->default_is_string) { 561 /* Default was explicitly set as an integer; look it up in enumdef. */ 562 const char *name = upb_enumdef_iton(e, f->defaultval.sint); 563 if (name) { 564 return name; 565 } 566 } else { 567 /* Default is completely unset; pull enumdef default. */ 568 if (upb_enumdef_numvals(e) > 0) { 569 const char *name = upb_enumdef_iton(e, upb_enumdef_default(e)); 570 assert(name); 571 return name; 572 } 573 } 574 } 575 return NULL; 576 } 577 578 static bool enumdefaultint32(const upb_fielddef *f, int32_t *val) { 579 const upb_enumdef *e; 580 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM); 581 e = upb_fielddef_enumsubdef(f); 582 if (!f->default_is_string) { 583 /* Default was explicitly set as an integer. */ 584 *val = f->defaultval.sint; 585 return true; 586 } else if (e) { 587 if (f->defaultval.bytes) { 588 /* Default was explicitly set as a str; try to lookup corresponding int. */ 589 str_t *s = f->defaultval.bytes; 590 if (upb_enumdef_ntoiz(e, s->str, val)) { 591 return true; 592 } 593 } else { 594 /* Default is unset; try to pull in enumdef default. */ 595 if (upb_enumdef_numvals(e) > 0) { 596 *val = upb_enumdef_default(e); 597 return true; 598 } 599 } 600 } 601 return false; 602 } 603 604 upb_fielddef *upb_fielddef_new(const void *o) { 605 static const struct upb_refcounted_vtbl vtbl = {visitfield, freefield}; 606 upb_fielddef *f = malloc(sizeof(*f)); 607 if (!f) return NULL; 608 if (!upb_def_init(upb_fielddef_upcast_mutable(f), UPB_DEF_FIELD, &vtbl, o)) { 609 free(f); 610 return NULL; 611 } 612 f->msg.def = NULL; 613 f->sub.def = NULL; 614 f->oneof = NULL; 615 f->subdef_is_symbolic = false; 616 f->msg_is_symbolic = false; 617 f->label_ = UPB_LABEL_OPTIONAL; 618 f->type_ = UPB_TYPE_INT32; 619 f->number_ = 0; 620 f->type_is_set_ = false; 621 f->tagdelim = false; 622 f->is_extension_ = false; 623 f->lazy_ = false; 624 f->packed_ = true; 625 626 /* For the moment we default this to UPB_INTFMT_VARIABLE, since it will work 627 * with all integer types and is in some since more "default" since the most 628 * normal-looking proto2 types int32/int64/uint32/uint64 use variable. 629 * 630 * Other options to consider: 631 * - there is no default; users must set this manually (like type). 632 * - default signed integers to UPB_INTFMT_ZIGZAG, since it's more likely to 633 * be an optimal default for signed integers. */ 634 f->intfmt = UPB_INTFMT_VARIABLE; 635 return f; 636 } 637 638 upb_fielddef *upb_fielddef_dup(const upb_fielddef *f, const void *owner) { 639 const char *srcname; 640 upb_fielddef *newf = upb_fielddef_new(owner); 641 if (!newf) return NULL; 642 upb_fielddef_settype(newf, upb_fielddef_type(f)); 643 upb_fielddef_setlabel(newf, upb_fielddef_label(f)); 644 upb_fielddef_setnumber(newf, upb_fielddef_number(f), NULL); 645 upb_fielddef_setname(newf, upb_fielddef_name(f), NULL); 646 if (f->default_is_string && f->defaultval.bytes) { 647 str_t *s = f->defaultval.bytes; 648 upb_fielddef_setdefaultstr(newf, s->str, s->len, NULL); 649 } else { 650 newf->default_is_string = f->default_is_string; 651 newf->defaultval = f->defaultval; 652 } 653 654 if (f->subdef_is_symbolic) { 655 srcname = f->sub.name; /* Might be NULL. */ 656 } else { 657 srcname = f->sub.def ? upb_def_fullname(f->sub.def) : NULL; 658 } 659 if (srcname) { 660 char *newname = malloc(strlen(f->sub.def->fullname) + 2); 661 if (!newname) { 662 upb_fielddef_unref(newf, owner); 663 return NULL; 664 } 665 strcpy(newname, "."); 666 strcat(newname, f->sub.def->fullname); 667 upb_fielddef_setsubdefname(newf, newname, NULL); 668 free(newname); 669 } 670 671 return newf; 672 } 673 674 bool upb_fielddef_typeisset(const upb_fielddef *f) { 675 return f->type_is_set_; 676 } 677 678 upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { 679 assert(f->type_is_set_); 680 return f->type_; 681 } 682 683 uint32_t upb_fielddef_index(const upb_fielddef *f) { 684 return f->index_; 685 } 686 687 upb_label_t upb_fielddef_label(const upb_fielddef *f) { 688 return f->label_; 689 } 690 691 upb_intfmt_t upb_fielddef_intfmt(const upb_fielddef *f) { 692 return f->intfmt; 693 } 694 695 bool upb_fielddef_istagdelim(const upb_fielddef *f) { 696 return f->tagdelim; 697 } 698 699 uint32_t upb_fielddef_number(const upb_fielddef *f) { 700 return f->number_; 701 } 702 703 bool upb_fielddef_isextension(const upb_fielddef *f) { 704 return f->is_extension_; 705 } 706 707 bool upb_fielddef_lazy(const upb_fielddef *f) { 708 return f->lazy_; 709 } 710 711 bool upb_fielddef_packed(const upb_fielddef *f) { 712 return f->packed_; 713 } 714 715 const char *upb_fielddef_name(const upb_fielddef *f) { 716 return upb_def_fullname(upb_fielddef_upcast(f)); 717 } 718 719 const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { 720 return f->msg_is_symbolic ? NULL : f->msg.def; 721 } 722 723 const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) { 724 return f->oneof; 725 } 726 727 upb_msgdef *upb_fielddef_containingtype_mutable(upb_fielddef *f) { 728 return (upb_msgdef*)upb_fielddef_containingtype(f); 729 } 730 731 const char *upb_fielddef_containingtypename(upb_fielddef *f) { 732 return f->msg_is_symbolic ? f->msg.name : NULL; 733 } 734 735 static void release_containingtype(upb_fielddef *f) { 736 if (f->msg_is_symbolic) free(f->msg.name); 737 } 738 739 bool upb_fielddef_setcontainingtypename(upb_fielddef *f, const char *name, 740 upb_status *s) { 741 assert(!upb_fielddef_isfrozen(f)); 742 if (upb_fielddef_containingtype(f)) { 743 upb_status_seterrmsg(s, "field has already been added to a message."); 744 return false; 745 } 746 /* TODO: validate name (upb_isident() doesn't quite work atm because this name 747 * may have a leading "."). */ 748 release_containingtype(f); 749 f->msg.name = upb_strdup(name); 750 f->msg_is_symbolic = true; 751 return true; 752 } 753 754 bool upb_fielddef_setname(upb_fielddef *f, const char *name, upb_status *s) { 755 if (upb_fielddef_containingtype(f) || upb_fielddef_containingoneof(f)) { 756 upb_status_seterrmsg(s, "Already added to message or oneof"); 757 return false; 758 } 759 return upb_def_setfullname(upb_fielddef_upcast_mutable(f), name, s); 760 } 761 762 static void chkdefaulttype(const upb_fielddef *f, upb_fieldtype_t type) { 763 UPB_UNUSED(f); 764 UPB_UNUSED(type); 765 assert(f->type_is_set_ && upb_fielddef_type(f) == type); 766 } 767 768 int64_t upb_fielddef_defaultint64(const upb_fielddef *f) { 769 chkdefaulttype(f, UPB_TYPE_INT64); 770 return f->defaultval.sint; 771 } 772 773 int32_t upb_fielddef_defaultint32(const upb_fielddef *f) { 774 if (f->type_is_set_ && upb_fielddef_type(f) == UPB_TYPE_ENUM) { 775 int32_t val; 776 bool ok = enumdefaultint32(f, &val); 777 UPB_ASSERT_VAR(ok, ok); 778 return val; 779 } else { 780 chkdefaulttype(f, UPB_TYPE_INT32); 781 return f->defaultval.sint; 782 } 783 } 784 785 uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) { 786 chkdefaulttype(f, UPB_TYPE_UINT64); 787 return f->defaultval.uint; 788 } 789 790 uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) { 791 chkdefaulttype(f, UPB_TYPE_UINT32); 792 return f->defaultval.uint; 793 } 794 795 bool upb_fielddef_defaultbool(const upb_fielddef *f) { 796 chkdefaulttype(f, UPB_TYPE_BOOL); 797 return f->defaultval.uint; 798 } 799 800 float upb_fielddef_defaultfloat(const upb_fielddef *f) { 801 chkdefaulttype(f, UPB_TYPE_FLOAT); 802 return f->defaultval.flt; 803 } 804 805 double upb_fielddef_defaultdouble(const upb_fielddef *f) { 806 chkdefaulttype(f, UPB_TYPE_DOUBLE); 807 return f->defaultval.dbl; 808 } 809 810 const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) { 811 assert(f->type_is_set_); 812 assert(upb_fielddef_type(f) == UPB_TYPE_STRING || 813 upb_fielddef_type(f) == UPB_TYPE_BYTES || 814 upb_fielddef_type(f) == UPB_TYPE_ENUM); 815 816 if (upb_fielddef_type(f) == UPB_TYPE_ENUM) { 817 const char *ret = enumdefaultstr(f); 818 assert(ret); 819 /* Enum defaults can't have embedded NULLs. */ 820 if (len) *len = strlen(ret); 821 return ret; 822 } 823 824 if (f->default_is_string) { 825 str_t *str = f->defaultval.bytes; 826 if (len) *len = str->len; 827 return str->str; 828 } 829 830 return NULL; 831 } 832 833 static void upb_fielddef_init_default(upb_fielddef *f) { 834 f->default_is_string = false; 835 switch (upb_fielddef_type(f)) { 836 case UPB_TYPE_DOUBLE: f->defaultval.dbl = 0; break; 837 case UPB_TYPE_FLOAT: f->defaultval.flt = 0; break; 838 case UPB_TYPE_INT32: 839 case UPB_TYPE_INT64: f->defaultval.sint = 0; break; 840 case UPB_TYPE_UINT64: 841 case UPB_TYPE_UINT32: 842 case UPB_TYPE_BOOL: f->defaultval.uint = 0; break; 843 case UPB_TYPE_STRING: 844 case UPB_TYPE_BYTES: 845 f->defaultval.bytes = newstr("", 0); 846 f->default_is_string = true; 847 break; 848 case UPB_TYPE_MESSAGE: break; 849 case UPB_TYPE_ENUM: 850 /* This is our special sentinel that indicates "not set" for an enum. */ 851 f->default_is_string = true; 852 f->defaultval.bytes = NULL; 853 break; 854 } 855 } 856 857 const upb_def *upb_fielddef_subdef(const upb_fielddef *f) { 858 return f->subdef_is_symbolic ? NULL : f->sub.def; 859 } 860 861 const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) { 862 const upb_def *def = upb_fielddef_subdef(f); 863 return def ? upb_dyncast_msgdef(def) : NULL; 864 } 865 866 const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) { 867 const upb_def *def = upb_fielddef_subdef(f); 868 return def ? upb_dyncast_enumdef(def) : NULL; 869 } 870 871 upb_def *upb_fielddef_subdef_mutable(upb_fielddef *f) { 872 return (upb_def*)upb_fielddef_subdef(f); 873 } 874 875 const char *upb_fielddef_subdefname(const upb_fielddef *f) { 876 if (f->subdef_is_symbolic) { 877 return f->sub.name; 878 } else if (f->sub.def) { 879 return upb_def_fullname(f->sub.def); 880 } else { 881 return NULL; 882 } 883 } 884 885 bool upb_fielddef_setnumber(upb_fielddef *f, uint32_t number, upb_status *s) { 886 if (upb_fielddef_containingtype(f)) { 887 upb_status_seterrmsg( 888 s, "cannot change field number after adding to a message"); 889 return false; 890 } 891 if (number == 0 || number > UPB_MAX_FIELDNUMBER) { 892 upb_status_seterrf(s, "invalid field number (%u)", number); 893 return false; 894 } 895 f->number_ = number; 896 return true; 897 } 898 899 void upb_fielddef_settype(upb_fielddef *f, upb_fieldtype_t type) { 900 assert(!upb_fielddef_isfrozen(f)); 901 assert(upb_fielddef_checktype(type)); 902 upb_fielddef_uninit_default(f); 903 f->type_ = type; 904 f->type_is_set_ = true; 905 upb_fielddef_init_default(f); 906 } 907 908 void upb_fielddef_setdescriptortype(upb_fielddef *f, int type) { 909 assert(!upb_fielddef_isfrozen(f)); 910 switch (type) { 911 case UPB_DESCRIPTOR_TYPE_DOUBLE: 912 upb_fielddef_settype(f, UPB_TYPE_DOUBLE); 913 break; 914 case UPB_DESCRIPTOR_TYPE_FLOAT: 915 upb_fielddef_settype(f, UPB_TYPE_FLOAT); 916 break; 917 case UPB_DESCRIPTOR_TYPE_INT64: 918 case UPB_DESCRIPTOR_TYPE_SFIXED64: 919 case UPB_DESCRIPTOR_TYPE_SINT64: 920 upb_fielddef_settype(f, UPB_TYPE_INT64); 921 break; 922 case UPB_DESCRIPTOR_TYPE_UINT64: 923 case UPB_DESCRIPTOR_TYPE_FIXED64: 924 upb_fielddef_settype(f, UPB_TYPE_UINT64); 925 break; 926 case UPB_DESCRIPTOR_TYPE_INT32: 927 case UPB_DESCRIPTOR_TYPE_SFIXED32: 928 case UPB_DESCRIPTOR_TYPE_SINT32: 929 upb_fielddef_settype(f, UPB_TYPE_INT32); 930 break; 931 case UPB_DESCRIPTOR_TYPE_UINT32: 932 case UPB_DESCRIPTOR_TYPE_FIXED32: 933 upb_fielddef_settype(f, UPB_TYPE_UINT32); 934 break; 935 case UPB_DESCRIPTOR_TYPE_BOOL: 936 upb_fielddef_settype(f, UPB_TYPE_BOOL); 937 break; 938 case UPB_DESCRIPTOR_TYPE_STRING: 939 upb_fielddef_settype(f, UPB_TYPE_STRING); 940 break; 941 case UPB_DESCRIPTOR_TYPE_BYTES: 942 upb_fielddef_settype(f, UPB_TYPE_BYTES); 943 break; 944 case UPB_DESCRIPTOR_TYPE_GROUP: 945 case UPB_DESCRIPTOR_TYPE_MESSAGE: 946 upb_fielddef_settype(f, UPB_TYPE_MESSAGE); 947 break; 948 case UPB_DESCRIPTOR_TYPE_ENUM: 949 upb_fielddef_settype(f, UPB_TYPE_ENUM); 950 break; 951 default: assert(false); 952 } 953 954 if (type == UPB_DESCRIPTOR_TYPE_FIXED64 || 955 type == UPB_DESCRIPTOR_TYPE_FIXED32 || 956 type == UPB_DESCRIPTOR_TYPE_SFIXED64 || 957 type == UPB_DESCRIPTOR_TYPE_SFIXED32) { 958 upb_fielddef_setintfmt(f, UPB_INTFMT_FIXED); 959 } else if (type == UPB_DESCRIPTOR_TYPE_SINT64 || 960 type == UPB_DESCRIPTOR_TYPE_SINT32) { 961 upb_fielddef_setintfmt(f, UPB_INTFMT_ZIGZAG); 962 } else { 963 upb_fielddef_setintfmt(f, UPB_INTFMT_VARIABLE); 964 } 965 966 upb_fielddef_settagdelim(f, type == UPB_DESCRIPTOR_TYPE_GROUP); 967 } 968 969 upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) { 970 switch (upb_fielddef_type(f)) { 971 case UPB_TYPE_FLOAT: return UPB_DESCRIPTOR_TYPE_FLOAT; 972 case UPB_TYPE_DOUBLE: return UPB_DESCRIPTOR_TYPE_DOUBLE; 973 case UPB_TYPE_BOOL: return UPB_DESCRIPTOR_TYPE_BOOL; 974 case UPB_TYPE_STRING: return UPB_DESCRIPTOR_TYPE_STRING; 975 case UPB_TYPE_BYTES: return UPB_DESCRIPTOR_TYPE_BYTES; 976 case UPB_TYPE_ENUM: return UPB_DESCRIPTOR_TYPE_ENUM; 977 case UPB_TYPE_INT32: 978 switch (upb_fielddef_intfmt(f)) { 979 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT32; 980 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED32; 981 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT32; 982 } 983 case UPB_TYPE_INT64: 984 switch (upb_fielddef_intfmt(f)) { 985 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_INT64; 986 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_SFIXED64; 987 case UPB_INTFMT_ZIGZAG: return UPB_DESCRIPTOR_TYPE_SINT64; 988 } 989 case UPB_TYPE_UINT32: 990 switch (upb_fielddef_intfmt(f)) { 991 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT32; 992 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED32; 993 case UPB_INTFMT_ZIGZAG: return -1; 994 } 995 case UPB_TYPE_UINT64: 996 switch (upb_fielddef_intfmt(f)) { 997 case UPB_INTFMT_VARIABLE: return UPB_DESCRIPTOR_TYPE_UINT64; 998 case UPB_INTFMT_FIXED: return UPB_DESCRIPTOR_TYPE_FIXED64; 999 case UPB_INTFMT_ZIGZAG: return -1; 1000 } 1001 case UPB_TYPE_MESSAGE: 1002 return upb_fielddef_istagdelim(f) ? 1003 UPB_DESCRIPTOR_TYPE_GROUP : UPB_DESCRIPTOR_TYPE_MESSAGE; 1004 } 1005 return 0; 1006 } 1007 1008 void upb_fielddef_setisextension(upb_fielddef *f, bool is_extension) { 1009 assert(!upb_fielddef_isfrozen(f)); 1010 f->is_extension_ = is_extension; 1011 } 1012 1013 void upb_fielddef_setlazy(upb_fielddef *f, bool lazy) { 1014 assert(!upb_fielddef_isfrozen(f)); 1015 f->lazy_ = lazy; 1016 } 1017 1018 void upb_fielddef_setpacked(upb_fielddef *f, bool packed) { 1019 assert(!upb_fielddef_isfrozen(f)); 1020 f->packed_ = packed; 1021 } 1022 1023 void upb_fielddef_setlabel(upb_fielddef *f, upb_label_t label) { 1024 assert(!upb_fielddef_isfrozen(f)); 1025 assert(upb_fielddef_checklabel(label)); 1026 f->label_ = label; 1027 } 1028 1029 void upb_fielddef_setintfmt(upb_fielddef *f, upb_intfmt_t fmt) { 1030 assert(!upb_fielddef_isfrozen(f)); 1031 assert(upb_fielddef_checkintfmt(fmt)); 1032 f->intfmt = fmt; 1033 } 1034 1035 void upb_fielddef_settagdelim(upb_fielddef *f, bool tag_delim) { 1036 assert(!upb_fielddef_isfrozen(f)); 1037 f->tagdelim = tag_delim; 1038 f->tagdelim = tag_delim; 1039 } 1040 1041 static bool checksetdefault(upb_fielddef *f, upb_fieldtype_t type) { 1042 if (!f->type_is_set_ || upb_fielddef_isfrozen(f) || 1043 upb_fielddef_type(f) != type) { 1044 assert(false); 1045 return false; 1046 } 1047 if (f->default_is_string) { 1048 str_t *s = f->defaultval.bytes; 1049 assert(s || type == UPB_TYPE_ENUM); 1050 if (s) freestr(s); 1051 } 1052 f->default_is_string = false; 1053 return true; 1054 } 1055 1056 void upb_fielddef_setdefaultint64(upb_fielddef *f, int64_t value) { 1057 if (checksetdefault(f, UPB_TYPE_INT64)) 1058 f->defaultval.sint = value; 1059 } 1060 1061 void upb_fielddef_setdefaultint32(upb_fielddef *f, int32_t value) { 1062 if ((upb_fielddef_type(f) == UPB_TYPE_ENUM && 1063 checksetdefault(f, UPB_TYPE_ENUM)) || 1064 checksetdefault(f, UPB_TYPE_INT32)) { 1065 f->defaultval.sint = value; 1066 } 1067 } 1068 1069 void upb_fielddef_setdefaultuint64(upb_fielddef *f, uint64_t value) { 1070 if (checksetdefault(f, UPB_TYPE_UINT64)) 1071 f->defaultval.uint = value; 1072 } 1073 1074 void upb_fielddef_setdefaultuint32(upb_fielddef *f, uint32_t value) { 1075 if (checksetdefault(f, UPB_TYPE_UINT32)) 1076 f->defaultval.uint = value; 1077 } 1078 1079 void upb_fielddef_setdefaultbool(upb_fielddef *f, bool value) { 1080 if (checksetdefault(f, UPB_TYPE_BOOL)) 1081 f->defaultval.uint = value; 1082 } 1083 1084 void upb_fielddef_setdefaultfloat(upb_fielddef *f, float value) { 1085 if (checksetdefault(f, UPB_TYPE_FLOAT)) 1086 f->defaultval.flt = value; 1087 } 1088 1089 void upb_fielddef_setdefaultdouble(upb_fielddef *f, double value) { 1090 if (checksetdefault(f, UPB_TYPE_DOUBLE)) 1091 f->defaultval.dbl = value; 1092 } 1093 1094 bool upb_fielddef_setdefaultstr(upb_fielddef *f, const void *str, size_t len, 1095 upb_status *s) { 1096 str_t *str2; 1097 assert(upb_fielddef_isstring(f) || f->type_ == UPB_TYPE_ENUM); 1098 if (f->type_ == UPB_TYPE_ENUM && !upb_isident(str, len, false, s)) 1099 return false; 1100 1101 if (f->default_is_string) { 1102 str_t *s = f->defaultval.bytes; 1103 assert(s || f->type_ == UPB_TYPE_ENUM); 1104 if (s) freestr(s); 1105 } else { 1106 assert(f->type_ == UPB_TYPE_ENUM); 1107 } 1108 1109 str2 = newstr(str, len); 1110 f->defaultval.bytes = str2; 1111 f->default_is_string = true; 1112 return true; 1113 } 1114 1115 void upb_fielddef_setdefaultcstr(upb_fielddef *f, const char *str, 1116 upb_status *s) { 1117 assert(f->type_is_set_); 1118 upb_fielddef_setdefaultstr(f, str, str ? strlen(str) : 0, s); 1119 } 1120 1121 bool upb_fielddef_enumhasdefaultint32(const upb_fielddef *f) { 1122 int32_t val; 1123 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM); 1124 return enumdefaultint32(f, &val); 1125 } 1126 1127 bool upb_fielddef_enumhasdefaultstr(const upb_fielddef *f) { 1128 assert(f->type_is_set_ && f->type_ == UPB_TYPE_ENUM); 1129 return enumdefaultstr(f) != NULL; 1130 } 1131 1132 static bool upb_subdef_typecheck(upb_fielddef *f, const upb_def *subdef, 1133 upb_status *s) { 1134 if (f->type_ == UPB_TYPE_MESSAGE) { 1135 if (upb_dyncast_msgdef(subdef)) return true; 1136 upb_status_seterrmsg(s, "invalid subdef type for this submessage field"); 1137 return false; 1138 } else if (f->type_ == UPB_TYPE_ENUM) { 1139 if (upb_dyncast_enumdef(subdef)) return true; 1140 upb_status_seterrmsg(s, "invalid subdef type for this enum field"); 1141 return false; 1142 } else { 1143 upb_status_seterrmsg(s, "only message and enum fields can have a subdef"); 1144 return false; 1145 } 1146 } 1147 1148 static void release_subdef(upb_fielddef *f) { 1149 if (f->subdef_is_symbolic) { 1150 free(f->sub.name); 1151 } else if (f->sub.def) { 1152 upb_unref2(f->sub.def, f); 1153 } 1154 } 1155 1156 bool upb_fielddef_setsubdef(upb_fielddef *f, const upb_def *subdef, 1157 upb_status *s) { 1158 assert(!upb_fielddef_isfrozen(f)); 1159 assert(upb_fielddef_hassubdef(f)); 1160 if (subdef && !upb_subdef_typecheck(f, subdef, s)) return false; 1161 release_subdef(f); 1162 f->sub.def = subdef; 1163 f->subdef_is_symbolic = false; 1164 if (f->sub.def) upb_ref2(f->sub.def, f); 1165 return true; 1166 } 1167 1168 bool upb_fielddef_setmsgsubdef(upb_fielddef *f, const upb_msgdef *subdef, 1169 upb_status *s) { 1170 return upb_fielddef_setsubdef(f, upb_msgdef_upcast(subdef), s); 1171 } 1172 1173 bool upb_fielddef_setenumsubdef(upb_fielddef *f, const upb_enumdef *subdef, 1174 upb_status *s) { 1175 return upb_fielddef_setsubdef(f, upb_enumdef_upcast(subdef), s); 1176 } 1177 1178 bool upb_fielddef_setsubdefname(upb_fielddef *f, const char *name, 1179 upb_status *s) { 1180 assert(!upb_fielddef_isfrozen(f)); 1181 if (!upb_fielddef_hassubdef(f)) { 1182 upb_status_seterrmsg(s, "field type does not accept a subdef"); 1183 return false; 1184 } 1185 /* TODO: validate name (upb_isident() doesn't quite work atm because this name 1186 * may have a leading "."). */ 1187 release_subdef(f); 1188 f->sub.name = upb_strdup(name); 1189 f->subdef_is_symbolic = true; 1190 return true; 1191 } 1192 1193 bool upb_fielddef_issubmsg(const upb_fielddef *f) { 1194 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE; 1195 } 1196 1197 bool upb_fielddef_isstring(const upb_fielddef *f) { 1198 return upb_fielddef_type(f) == UPB_TYPE_STRING || 1199 upb_fielddef_type(f) == UPB_TYPE_BYTES; 1200 } 1201 1202 bool upb_fielddef_isseq(const upb_fielddef *f) { 1203 return upb_fielddef_label(f) == UPB_LABEL_REPEATED; 1204 } 1205 1206 bool upb_fielddef_isprimitive(const upb_fielddef *f) { 1207 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); 1208 } 1209 1210 bool upb_fielddef_ismap(const upb_fielddef *f) { 1211 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) && 1212 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); 1213 } 1214 1215 bool upb_fielddef_hassubdef(const upb_fielddef *f) { 1216 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; 1217 } 1218 1219 static bool between(int32_t x, int32_t low, int32_t high) { 1220 return x >= low && x <= high; 1221 } 1222 1223 bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); } 1224 bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); } 1225 bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } 1226 1227 bool upb_fielddef_checkdescriptortype(int32_t type) { 1228 return between(type, 1, 18); 1229 } 1230 1231 /* upb_msgdef *****************************************************************/ 1232 1233 static void visitmsg(const upb_refcounted *r, upb_refcounted_visit *visit, 1234 void *closure) { 1235 upb_msg_oneof_iter o; 1236 const upb_msgdef *m = (const upb_msgdef*)r; 1237 upb_msg_field_iter i; 1238 for(upb_msg_field_begin(&i, m); 1239 !upb_msg_field_done(&i); 1240 upb_msg_field_next(&i)) { 1241 upb_fielddef *f = upb_msg_iter_field(&i); 1242 visit(r, upb_fielddef_upcast2(f), closure); 1243 } 1244 for(upb_msg_oneof_begin(&o, m); 1245 !upb_msg_oneof_done(&o); 1246 upb_msg_oneof_next(&o)) { 1247 upb_oneofdef *f = upb_msg_iter_oneof(&o); 1248 visit(r, upb_oneofdef_upcast2(f), closure); 1249 } 1250 } 1251 1252 static void freemsg(upb_refcounted *r) { 1253 upb_msgdef *m = (upb_msgdef*)r; 1254 upb_strtable_uninit(&m->ntoo); 1255 upb_strtable_uninit(&m->ntof); 1256 upb_inttable_uninit(&m->itof); 1257 upb_def_uninit(upb_msgdef_upcast_mutable(m)); 1258 free(m); 1259 } 1260 1261 upb_msgdef *upb_msgdef_new(const void *owner) { 1262 static const struct upb_refcounted_vtbl vtbl = {visitmsg, freemsg}; 1263 upb_msgdef *m = malloc(sizeof(*m)); 1264 if (!m) return NULL; 1265 if (!upb_def_init(upb_msgdef_upcast_mutable(m), UPB_DEF_MSG, &vtbl, owner)) 1266 goto err2; 1267 if (!upb_inttable_init(&m->itof, UPB_CTYPE_PTR)) goto err3; 1268 if (!upb_strtable_init(&m->ntof, UPB_CTYPE_PTR)) goto err2; 1269 if (!upb_strtable_init(&m->ntoo, UPB_CTYPE_PTR)) goto err1; 1270 m->map_entry = false; 1271 return m; 1272 1273 err1: 1274 upb_strtable_uninit(&m->ntof); 1275 err2: 1276 upb_inttable_uninit(&m->itof); 1277 err3: 1278 free(m); 1279 return NULL; 1280 } 1281 1282 upb_msgdef *upb_msgdef_dup(const upb_msgdef *m, const void *owner) { 1283 bool ok; 1284 upb_msg_field_iter i; 1285 upb_msg_oneof_iter o; 1286 1287 upb_msgdef *newm = upb_msgdef_new(owner); 1288 if (!newm) return NULL; 1289 ok = upb_def_setfullname(upb_msgdef_upcast_mutable(newm), 1290 upb_def_fullname(upb_msgdef_upcast(m)), 1291 NULL); 1292 newm->map_entry = m->map_entry; 1293 UPB_ASSERT_VAR(ok, ok); 1294 for(upb_msg_field_begin(&i, m); 1295 !upb_msg_field_done(&i); 1296 upb_msg_field_next(&i)) { 1297 upb_fielddef *f = upb_fielddef_dup(upb_msg_iter_field(&i), &f); 1298 /* Fields in oneofs are dup'd below. */ 1299 if (upb_fielddef_containingoneof(f)) continue; 1300 if (!f || !upb_msgdef_addfield(newm, f, &f, NULL)) { 1301 upb_msgdef_unref(newm, owner); 1302 return NULL; 1303 } 1304 } 1305 for(upb_msg_oneof_begin(&o, m); 1306 !upb_msg_oneof_done(&o); 1307 upb_msg_oneof_next(&o)) { 1308 upb_oneofdef *f = upb_oneofdef_dup(upb_msg_iter_oneof(&o), &f); 1309 if (!f || !upb_msgdef_addoneof(newm, f, &f, NULL)) { 1310 upb_msgdef_unref(newm, owner); 1311 return NULL; 1312 } 1313 } 1314 return newm; 1315 } 1316 1317 bool upb_msgdef_freeze(upb_msgdef *m, upb_status *status) { 1318 upb_def *d = upb_msgdef_upcast_mutable(m); 1319 return upb_def_freeze(&d, 1, status); 1320 } 1321 1322 const char *upb_msgdef_fullname(const upb_msgdef *m) { 1323 return upb_def_fullname(upb_msgdef_upcast(m)); 1324 } 1325 1326 bool upb_msgdef_setfullname(upb_msgdef *m, const char *fullname, 1327 upb_status *s) { 1328 return upb_def_setfullname(upb_msgdef_upcast_mutable(m), fullname, s); 1329 } 1330 1331 /* Helper: check that the field |f| is safe to add to msgdef |m|. Set an error 1332 * on status |s| and return false if not. */ 1333 static bool check_field_add(const upb_msgdef *m, const upb_fielddef *f, 1334 upb_status *s) { 1335 if (upb_fielddef_containingtype(f) != NULL) { 1336 upb_status_seterrmsg(s, "fielddef already belongs to a message"); 1337 return false; 1338 } else if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { 1339 upb_status_seterrmsg(s, "field name or number were not set"); 1340 return false; 1341 } else if (upb_msgdef_ntofz(m, upb_fielddef_name(f)) || 1342 upb_msgdef_itof(m, upb_fielddef_number(f))) { 1343 upb_status_seterrmsg(s, "duplicate field name or number for field"); 1344 return false; 1345 } 1346 return true; 1347 } 1348 1349 static void add_field(upb_msgdef *m, upb_fielddef *f, const void *ref_donor) { 1350 release_containingtype(f); 1351 f->msg.def = m; 1352 f->msg_is_symbolic = false; 1353 upb_inttable_insert(&m->itof, upb_fielddef_number(f), upb_value_ptr(f)); 1354 upb_strtable_insert(&m->ntof, upb_fielddef_name(f), upb_value_ptr(f)); 1355 upb_ref2(f, m); 1356 upb_ref2(m, f); 1357 if (ref_donor) upb_fielddef_unref(f, ref_donor); 1358 } 1359 1360 bool upb_msgdef_addfield(upb_msgdef *m, upb_fielddef *f, const void *ref_donor, 1361 upb_status *s) { 1362 /* TODO: extensions need to have a separate namespace, because proto2 allows a 1363 * top-level extension (ie. one not in any package) to have the same name as a 1364 * field from the message. 1365 * 1366 * This also implies that there needs to be a separate lookup-by-name method 1367 * for extensions. It seems desirable for iteration to return both extensions 1368 * and non-extensions though. 1369 * 1370 * We also need to validate that the field number is in an extension range iff 1371 * it is an extension. 1372 * 1373 * This method is idempotent. Check if |f| is already part of this msgdef and 1374 * return immediately if so. */ 1375 if (upb_fielddef_containingtype(f) == m) { 1376 return true; 1377 } 1378 1379 /* Check constraints for all fields before performing any action. */ 1380 if (!check_field_add(m, f, s)) { 1381 return false; 1382 } else if (upb_fielddef_containingoneof(f) != NULL) { 1383 /* Fields in a oneof can only be added by adding the oneof to the msgdef. */ 1384 upb_status_seterrmsg(s, "fielddef is part of a oneof"); 1385 return false; 1386 } 1387 1388 /* Constraint checks ok, perform the action. */ 1389 add_field(m, f, ref_donor); 1390 return true; 1391 } 1392 1393 bool upb_msgdef_addoneof(upb_msgdef *m, upb_oneofdef *o, const void *ref_donor, 1394 upb_status *s) { 1395 upb_oneof_iter it; 1396 1397 /* Check various conditions that would prevent this oneof from being added. */ 1398 if (upb_oneofdef_containingtype(o)) { 1399 upb_status_seterrmsg(s, "oneofdef already belongs to a message"); 1400 return false; 1401 } else if (upb_oneofdef_name(o) == NULL) { 1402 upb_status_seterrmsg(s, "oneofdef name was not set"); 1403 return false; 1404 } else if (upb_msgdef_ntooz(m, upb_oneofdef_name(o))) { 1405 upb_status_seterrmsg(s, "duplicate oneof name"); 1406 return false; 1407 } 1408 1409 /* Check that all of the oneof's fields do not conflict with names or numbers 1410 * of fields already in the message. */ 1411 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) { 1412 const upb_fielddef *f = upb_oneof_iter_field(&it); 1413 if (!check_field_add(m, f, s)) { 1414 return false; 1415 } 1416 } 1417 1418 /* Everything checks out -- commit now. */ 1419 1420 /* Add oneof itself first. */ 1421 o->parent = m; 1422 upb_strtable_insert(&m->ntoo, upb_oneofdef_name(o), upb_value_ptr(o)); 1423 upb_ref2(o, m); 1424 upb_ref2(m, o); 1425 1426 /* Add each field of the oneof directly to the msgdef. */ 1427 for (upb_oneof_begin(&it, o); !upb_oneof_done(&it); upb_oneof_next(&it)) { 1428 upb_fielddef *f = upb_oneof_iter_field(&it); 1429 add_field(m, f, NULL); 1430 } 1431 1432 if (ref_donor) upb_oneofdef_unref(o, ref_donor); 1433 1434 return true; 1435 } 1436 1437 const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { 1438 upb_value val; 1439 return upb_inttable_lookup32(&m->itof, i, &val) ? 1440 upb_value_getptr(val) : NULL; 1441 } 1442 1443 const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, 1444 size_t len) { 1445 upb_value val; 1446 return upb_strtable_lookup2(&m->ntof, name, len, &val) ? 1447 upb_value_getptr(val) : NULL; 1448 } 1449 1450 const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, 1451 size_t len) { 1452 upb_value val; 1453 return upb_strtable_lookup2(&m->ntoo, name, len, &val) ? 1454 upb_value_getptr(val) : NULL; 1455 } 1456 1457 int upb_msgdef_numfields(const upb_msgdef *m) { 1458 return upb_strtable_count(&m->ntof); 1459 } 1460 1461 int upb_msgdef_numoneofs(const upb_msgdef *m) { 1462 return upb_strtable_count(&m->ntoo); 1463 } 1464 1465 void upb_msgdef_setmapentry(upb_msgdef *m, bool map_entry) { 1466 assert(!upb_msgdef_isfrozen(m)); 1467 m->map_entry = map_entry; 1468 } 1469 1470 bool upb_msgdef_mapentry(const upb_msgdef *m) { 1471 return m->map_entry; 1472 } 1473 1474 void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) { 1475 upb_inttable_begin(iter, &m->itof); 1476 } 1477 1478 void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); } 1479 1480 bool upb_msg_field_done(const upb_msg_field_iter *iter) { 1481 return upb_inttable_done(iter); 1482 } 1483 1484 upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) { 1485 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); 1486 } 1487 1488 void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) { 1489 upb_inttable_iter_setdone(iter); 1490 } 1491 1492 void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) { 1493 upb_strtable_begin(iter, &m->ntoo); 1494 } 1495 1496 void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { upb_strtable_next(iter); } 1497 1498 bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) { 1499 return upb_strtable_done(iter); 1500 } 1501 1502 upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) { 1503 return (upb_oneofdef*)upb_value_getptr(upb_strtable_iter_value(iter)); 1504 } 1505 1506 void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) { 1507 upb_strtable_iter_setdone(iter); 1508 } 1509 1510 /* upb_oneofdef ***************************************************************/ 1511 1512 static void visitoneof(const upb_refcounted *r, upb_refcounted_visit *visit, 1513 void *closure) { 1514 const upb_oneofdef *o = (const upb_oneofdef*)r; 1515 upb_oneof_iter i; 1516 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) { 1517 const upb_fielddef *f = upb_oneof_iter_field(&i); 1518 visit(r, upb_fielddef_upcast2(f), closure); 1519 } 1520 if (o->parent) { 1521 visit(r, upb_msgdef_upcast2(o->parent), closure); 1522 } 1523 } 1524 1525 static void freeoneof(upb_refcounted *r) { 1526 upb_oneofdef *o = (upb_oneofdef*)r; 1527 upb_strtable_uninit(&o->ntof); 1528 upb_inttable_uninit(&o->itof); 1529 upb_def_uninit(upb_oneofdef_upcast_mutable(o)); 1530 free(o); 1531 } 1532 1533 upb_oneofdef *upb_oneofdef_new(const void *owner) { 1534 static const struct upb_refcounted_vtbl vtbl = {visitoneof, freeoneof}; 1535 upb_oneofdef *o = malloc(sizeof(*o)); 1536 o->parent = NULL; 1537 if (!o) return NULL; 1538 if (!upb_def_init(upb_oneofdef_upcast_mutable(o), UPB_DEF_ONEOF, &vtbl, 1539 owner)) 1540 goto err2; 1541 if (!upb_inttable_init(&o->itof, UPB_CTYPE_PTR)) goto err2; 1542 if (!upb_strtable_init(&o->ntof, UPB_CTYPE_PTR)) goto err1; 1543 return o; 1544 1545 err1: 1546 upb_inttable_uninit(&o->itof); 1547 err2: 1548 free(o); 1549 return NULL; 1550 } 1551 1552 upb_oneofdef *upb_oneofdef_dup(const upb_oneofdef *o, const void *owner) { 1553 bool ok; 1554 upb_oneof_iter i; 1555 upb_oneofdef *newo = upb_oneofdef_new(owner); 1556 if (!newo) return NULL; 1557 ok = upb_def_setfullname(upb_oneofdef_upcast_mutable(newo), 1558 upb_def_fullname(upb_oneofdef_upcast(o)), NULL); 1559 UPB_ASSERT_VAR(ok, ok); 1560 for (upb_oneof_begin(&i, o); !upb_oneof_done(&i); upb_oneof_next(&i)) { 1561 upb_fielddef *f = upb_fielddef_dup(upb_oneof_iter_field(&i), &f); 1562 if (!f || !upb_oneofdef_addfield(newo, f, &f, NULL)) { 1563 upb_oneofdef_unref(newo, owner); 1564 return NULL; 1565 } 1566 } 1567 return newo; 1568 } 1569 1570 const char *upb_oneofdef_name(const upb_oneofdef *o) { 1571 return upb_def_fullname(upb_oneofdef_upcast(o)); 1572 } 1573 1574 bool upb_oneofdef_setname(upb_oneofdef *o, const char *fullname, 1575 upb_status *s) { 1576 if (upb_oneofdef_containingtype(o)) { 1577 upb_status_seterrmsg(s, "oneof already added to a message"); 1578 return false; 1579 } 1580 return upb_def_setfullname(upb_oneofdef_upcast_mutable(o), fullname, s); 1581 } 1582 1583 const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) { 1584 return o->parent; 1585 } 1586 1587 int upb_oneofdef_numfields(const upb_oneofdef *o) { 1588 return upb_strtable_count(&o->ntof); 1589 } 1590 1591 bool upb_oneofdef_addfield(upb_oneofdef *o, upb_fielddef *f, 1592 const void *ref_donor, 1593 upb_status *s) { 1594 assert(!upb_oneofdef_isfrozen(o)); 1595 assert(!o->parent || !upb_msgdef_isfrozen(o->parent)); 1596 1597 /* This method is idempotent. Check if |f| is already part of this oneofdef 1598 * and return immediately if so. */ 1599 if (upb_fielddef_containingoneof(f) == o) { 1600 return true; 1601 } 1602 1603 /* The field must have an OPTIONAL label. */ 1604 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { 1605 upb_status_seterrmsg(s, "fields in oneof must have OPTIONAL label"); 1606 return false; 1607 } 1608 1609 /* Check that no field with this name or number exists already in the oneof. 1610 * Also check that the field is not already part of a oneof. */ 1611 if (upb_fielddef_name(f) == NULL || upb_fielddef_number(f) == 0) { 1612 upb_status_seterrmsg(s, "field name or number were not set"); 1613 return false; 1614 } else if (upb_oneofdef_itof(o, upb_fielddef_number(f)) || 1615 upb_oneofdef_ntofz(o, upb_fielddef_name(f))) { 1616 upb_status_seterrmsg(s, "duplicate field name or number"); 1617 return false; 1618 } else if (upb_fielddef_containingoneof(f) != NULL) { 1619 upb_status_seterrmsg(s, "fielddef already belongs to a oneof"); 1620 return false; 1621 } 1622 1623 /* We allow adding a field to the oneof either if the field is not part of a 1624 * msgdef, or if it is and we are also part of the same msgdef. */ 1625 if (o->parent == NULL) { 1626 /* If we're not in a msgdef, the field cannot be either. Otherwise we would 1627 * need to magically add this oneof to a msgdef to remain consistent, which 1628 * is surprising behavior. */ 1629 if (upb_fielddef_containingtype(f) != NULL) { 1630 upb_status_seterrmsg(s, "fielddef already belongs to a message, but " 1631 "oneof does not"); 1632 return false; 1633 } 1634 } else { 1635 /* If we're in a msgdef, the user can add fields that either aren't in any 1636 * msgdef (in which case they're added to our msgdef) or already a part of 1637 * our msgdef. */ 1638 if (upb_fielddef_containingtype(f) != NULL && 1639 upb_fielddef_containingtype(f) != o->parent) { 1640 upb_status_seterrmsg(s, "fielddef belongs to a different message " 1641 "than oneof"); 1642 return false; 1643 } 1644 } 1645 1646 /* Commit phase. First add the field to our parent msgdef, if any, because 1647 * that may fail; then add the field to our own tables. */ 1648 1649 if (o->parent != NULL && upb_fielddef_containingtype(f) == NULL) { 1650 if (!upb_msgdef_addfield((upb_msgdef*)o->parent, f, NULL, s)) { 1651 return false; 1652 } 1653 } 1654 1655 release_containingtype(f); 1656 f->oneof = o; 1657 upb_inttable_insert(&o->itof, upb_fielddef_number(f), upb_value_ptr(f)); 1658 upb_strtable_insert(&o->ntof, upb_fielddef_name(f), upb_value_ptr(f)); 1659 upb_ref2(f, o); 1660 upb_ref2(o, f); 1661 if (ref_donor) upb_fielddef_unref(f, ref_donor); 1662 1663 return true; 1664 } 1665 1666 const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, 1667 const char *name, size_t length) { 1668 upb_value val; 1669 return upb_strtable_lookup2(&o->ntof, name, length, &val) ? 1670 upb_value_getptr(val) : NULL; 1671 } 1672 1673 const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { 1674 upb_value val; 1675 return upb_inttable_lookup32(&o->itof, num, &val) ? 1676 upb_value_getptr(val) : NULL; 1677 } 1678 1679 void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { 1680 upb_inttable_begin(iter, &o->itof); 1681 } 1682 1683 void upb_oneof_next(upb_oneof_iter *iter) { 1684 upb_inttable_next(iter); 1685 } 1686 1687 bool upb_oneof_done(upb_oneof_iter *iter) { 1688 return upb_inttable_done(iter); 1689 } 1690 1691 upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { 1692 return (upb_fielddef*)upb_value_getptr(upb_inttable_iter_value(iter)); 1693 } 1694 1695 void upb_oneof_iter_setdone(upb_oneof_iter *iter) { 1696 upb_inttable_iter_setdone(iter); 1697 } 1698 1699 1700 #include <stdlib.h> 1701 #include <stdio.h> 1702 #include <string.h> 1703 1704 typedef struct cleanup_ent { 1705 upb_cleanup_func *cleanup; 1706 void *ud; 1707 struct cleanup_ent *next; 1708 } cleanup_ent; 1709 1710 static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, size_t size); 1711 1712 /* Default allocator **********************************************************/ 1713 1714 /* Just use realloc, keeping all allocated blocks in a linked list to destroy at 1715 * the end. */ 1716 1717 typedef struct mem_block { 1718 /* List is doubly-linked, because in cases where realloc() moves an existing 1719 * block, we need to be able to remove the old pointer from the list 1720 * efficiently. */ 1721 struct mem_block *prev, *next; 1722 #ifndef NDEBUG 1723 size_t size; /* Doesn't include mem_block structure. */ 1724 #endif 1725 } mem_block; 1726 1727 typedef struct { 1728 mem_block *head; 1729 } default_alloc_ud; 1730 1731 static void *default_alloc(void *_ud, void *ptr, size_t oldsize, size_t size) { 1732 default_alloc_ud *ud = _ud; 1733 mem_block *from, *block; 1734 void *ret; 1735 UPB_UNUSED(oldsize); 1736 1737 from = ptr ? (void*)((char*)ptr - sizeof(mem_block)) : NULL; 1738 1739 #ifndef NDEBUG 1740 if (from) { 1741 assert(oldsize <= from->size); 1742 } 1743 #endif 1744 1745 /* TODO(haberman): we probably need to provide even better alignment here, 1746 * like 16-byte alignment of the returned data pointer. */ 1747 block = realloc(from, size + sizeof(mem_block)); 1748 if (!block) return NULL; 1749 ret = (char*)block + sizeof(*block); 1750 1751 #ifndef NDEBUG 1752 block->size = size; 1753 #endif 1754 1755 if (from) { 1756 if (block != from) { 1757 /* The block was moved, so pointers in next and prev blocks must be 1758 * updated to its new location. */ 1759 if (block->next) block->next->prev = block; 1760 if (block->prev) block->prev->next = block; 1761 if (ud->head == from) ud->head = block; 1762 } 1763 } else { 1764 /* Insert at head of linked list. */ 1765 block->prev = NULL; 1766 block->next = ud->head; 1767 if (block->next) block->next->prev = block; 1768 ud->head = block; 1769 } 1770 1771 return ret; 1772 } 1773 1774 static void default_alloc_cleanup(void *_ud) { 1775 default_alloc_ud *ud = _ud; 1776 mem_block *block = ud->head; 1777 1778 while (block) { 1779 void *to_free = block; 1780 block = block->next; 1781 free(to_free); 1782 } 1783 } 1784 1785 1786 /* Standard error functions ***************************************************/ 1787 1788 static bool default_err(void *ud, const upb_status *status) { 1789 UPB_UNUSED(ud); 1790 UPB_UNUSED(status); 1791 return false; 1792 } 1793 1794 static bool write_err_to(void *ud, const upb_status *status) { 1795 upb_status *copy_to = ud; 1796 upb_status_copy(copy_to, status); 1797 return false; 1798 } 1799 1800 1801 /* upb_env ********************************************************************/ 1802 1803 void upb_env_init(upb_env *e) { 1804 default_alloc_ud *ud = (default_alloc_ud*)&e->default_alloc_ud; 1805 e->ok_ = true; 1806 e->bytes_allocated = 0; 1807 e->cleanup_head = NULL; 1808 1809 ud->head = NULL; 1810 1811 /* Set default functions. */ 1812 upb_env_setallocfunc(e, default_alloc, ud); 1813 upb_env_seterrorfunc(e, default_err, NULL); 1814 } 1815 1816 void upb_env_uninit(upb_env *e) { 1817 cleanup_ent *ent = e->cleanup_head; 1818 1819 while (ent) { 1820 ent->cleanup(ent->ud); 1821 ent = ent->next; 1822 } 1823 1824 /* Must do this after running cleanup functions, because this will delete 1825 the memory we store our cleanup entries in! */ 1826 if (e->alloc == default_alloc) { 1827 default_alloc_cleanup(e->alloc_ud); 1828 } 1829 } 1830 1831 UPB_FORCEINLINE void upb_env_setallocfunc(upb_env *e, upb_alloc_func *alloc, 1832 void *ud) { 1833 e->alloc = alloc; 1834 e->alloc_ud = ud; 1835 } 1836 1837 UPB_FORCEINLINE void upb_env_seterrorfunc(upb_env *e, upb_error_func *func, 1838 void *ud) { 1839 e->err = func; 1840 e->err_ud = ud; 1841 } 1842 1843 void upb_env_reporterrorsto(upb_env *e, upb_status *status) { 1844 e->err = write_err_to; 1845 e->err_ud = status; 1846 } 1847 1848 bool upb_env_ok(const upb_env *e) { 1849 return e->ok_; 1850 } 1851 1852 bool upb_env_reporterror(upb_env *e, const upb_status *status) { 1853 e->ok_ = false; 1854 return e->err(e->err_ud, status); 1855 } 1856 1857 bool upb_env_addcleanup(upb_env *e, upb_cleanup_func *func, void *ud) { 1858 cleanup_ent *ent = upb_env_malloc(e, sizeof(cleanup_ent)); 1859 if (!ent) return false; 1860 1861 ent->cleanup = func; 1862 ent->ud = ud; 1863 ent->next = e->cleanup_head; 1864 e->cleanup_head = ent; 1865 1866 return true; 1867 } 1868 1869 void *upb_env_malloc(upb_env *e, size_t size) { 1870 e->bytes_allocated += size; 1871 if (e->alloc == seeded_alloc) { 1872 /* This is equivalent to the next branch, but allows inlining for a 1873 * measurable perf benefit. */ 1874 return seeded_alloc(e->alloc_ud, NULL, 0, size); 1875 } else { 1876 return e->alloc(e->alloc_ud, NULL, 0, size); 1877 } 1878 } 1879 1880 void *upb_env_realloc(upb_env *e, void *ptr, size_t oldsize, size_t size) { 1881 char *ret; 1882 assert(oldsize <= size); 1883 ret = e->alloc(e->alloc_ud, ptr, oldsize, size); 1884 1885 #ifndef NDEBUG 1886 /* Overwrite non-preserved memory to ensure callers are passing the oldsize 1887 * that they truly require. */ 1888 memset(ret + oldsize, 0xff, size - oldsize); 1889 #endif 1890 1891 return ret; 1892 } 1893 1894 size_t upb_env_bytesallocated(const upb_env *e) { 1895 return e->bytes_allocated; 1896 } 1897 1898 1899 /* upb_seededalloc ************************************************************/ 1900 1901 /* Be conservative and choose 16 in case anyone is using SSE. */ 1902 static const size_t maxalign = 16; 1903 1904 static size_t align_up(size_t size) { 1905 return ((size + maxalign - 1) / maxalign) * maxalign; 1906 } 1907 1908 UPB_FORCEINLINE static void *seeded_alloc(void *ud, void *ptr, size_t oldsize, 1909 size_t size) { 1910 upb_seededalloc *a = ud; 1911 1912 size = align_up(size); 1913 1914 assert(a->mem_limit >= a->mem_ptr); 1915 1916 if (oldsize == 0 && size <= (size_t)(a->mem_limit - a->mem_ptr)) { 1917 /* Fast path: we can satisfy from the initial allocation. */ 1918 void *ret = a->mem_ptr; 1919 a->mem_ptr += size; 1920 return ret; 1921 } else { 1922 char *chptr = ptr; 1923 /* Slow path: fallback to other allocator. */ 1924 a->need_cleanup = true; 1925 /* Is `ptr` part of the user-provided initial block? Don't pass it to the 1926 * default allocator if so; otherwise, it may try to realloc() the block. */ 1927 if (chptr >= a->mem_base && chptr < a->mem_limit) { 1928 void *ret; 1929 assert(chptr + oldsize <= a->mem_limit); 1930 ret = a->alloc(a->alloc_ud, NULL, 0, size); 1931 if (ret) memcpy(ret, ptr, oldsize); 1932 return ret; 1933 } else { 1934 return a->alloc(a->alloc_ud, ptr, oldsize, size); 1935 } 1936 } 1937 } 1938 1939 void upb_seededalloc_init(upb_seededalloc *a, void *mem, size_t len) { 1940 default_alloc_ud *ud = (default_alloc_ud*)&a->default_alloc_ud; 1941 a->mem_base = mem; 1942 a->mem_ptr = mem; 1943 a->mem_limit = (char*)mem + len; 1944 a->need_cleanup = false; 1945 a->returned_allocfunc = false; 1946 1947 ud->head = NULL; 1948 1949 upb_seededalloc_setfallbackalloc(a, default_alloc, ud); 1950 } 1951 1952 void upb_seededalloc_uninit(upb_seededalloc *a) { 1953 if (a->alloc == default_alloc && a->need_cleanup) { 1954 default_alloc_cleanup(a->alloc_ud); 1955 } 1956 } 1957 1958 UPB_FORCEINLINE void upb_seededalloc_setfallbackalloc(upb_seededalloc *a, 1959 upb_alloc_func *alloc, 1960 void *ud) { 1961 assert(!a->returned_allocfunc); 1962 a->alloc = alloc; 1963 a->alloc_ud = ud; 1964 } 1965 1966 upb_alloc_func *upb_seededalloc_getallocfunc(upb_seededalloc *a) { 1967 a->returned_allocfunc = true; 1968 return seeded_alloc; 1969 } 1970 /* 1971 ** TODO(haberman): it's unclear whether a lot of the consistency checks should 1972 ** assert() or return false. 1973 */ 1974 1975 1976 #include <stdlib.h> 1977 #include <string.h> 1978 1979 1980 1981 /* Defined for the sole purpose of having a unique pointer value for 1982 * UPB_NO_CLOSURE. */ 1983 char _upb_noclosure; 1984 1985 static void freehandlers(upb_refcounted *r) { 1986 upb_handlers *h = (upb_handlers*)r; 1987 1988 upb_inttable_iter i; 1989 upb_inttable_begin(&i, &h->cleanup_); 1990 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 1991 void *val = (void*)upb_inttable_iter_key(&i); 1992 upb_value func_val = upb_inttable_iter_value(&i); 1993 upb_handlerfree *func = upb_value_getfptr(func_val); 1994 func(val); 1995 } 1996 1997 upb_inttable_uninit(&h->cleanup_); 1998 upb_msgdef_unref(h->msg, h); 1999 free(h->sub); 2000 free(h); 2001 } 2002 2003 static void visithandlers(const upb_refcounted *r, upb_refcounted_visit *visit, 2004 void *closure) { 2005 const upb_handlers *h = (const upb_handlers*)r; 2006 upb_msg_field_iter i; 2007 for(upb_msg_field_begin(&i, h->msg); 2008 !upb_msg_field_done(&i); 2009 upb_msg_field_next(&i)) { 2010 upb_fielddef *f = upb_msg_iter_field(&i); 2011 const upb_handlers *sub; 2012 if (!upb_fielddef_issubmsg(f)) continue; 2013 sub = upb_handlers_getsubhandlers(h, f); 2014 if (sub) visit(r, upb_handlers_upcast(sub), closure); 2015 } 2016 } 2017 2018 static const struct upb_refcounted_vtbl vtbl = {visithandlers, freehandlers}; 2019 2020 typedef struct { 2021 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */ 2022 upb_handlers_callback *callback; 2023 const void *closure; 2024 } dfs_state; 2025 2026 /* TODO(haberman): discard upb_handlers* objects that do not actually have any 2027 * handlers set and cannot reach any upb_handlers* object that does. This is 2028 * slightly tricky to do correctly. */ 2029 static upb_handlers *newformsg(const upb_msgdef *m, const void *owner, 2030 dfs_state *s) { 2031 upb_msg_field_iter i; 2032 upb_handlers *h = upb_handlers_new(m, owner); 2033 if (!h) return NULL; 2034 if (!upb_inttable_insertptr(&s->tab, m, upb_value_ptr(h))) goto oom; 2035 2036 s->callback(s->closure, h); 2037 2038 /* For each submessage field, get or create a handlers object and set it as 2039 * the subhandlers. */ 2040 for(upb_msg_field_begin(&i, m); 2041 !upb_msg_field_done(&i); 2042 upb_msg_field_next(&i)) { 2043 upb_fielddef *f = upb_msg_iter_field(&i); 2044 const upb_msgdef *subdef; 2045 upb_value subm_ent; 2046 2047 if (!upb_fielddef_issubmsg(f)) continue; 2048 2049 subdef = upb_downcast_msgdef(upb_fielddef_subdef(f)); 2050 if (upb_inttable_lookupptr(&s->tab, subdef, &subm_ent)) { 2051 upb_handlers_setsubhandlers(h, f, upb_value_getptr(subm_ent)); 2052 } else { 2053 upb_handlers *sub_mh = newformsg(subdef, &sub_mh, s); 2054 if (!sub_mh) goto oom; 2055 upb_handlers_setsubhandlers(h, f, sub_mh); 2056 upb_handlers_unref(sub_mh, &sub_mh); 2057 } 2058 } 2059 return h; 2060 2061 oom: 2062 upb_handlers_unref(h, owner); 2063 return NULL; 2064 } 2065 2066 /* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the 2067 * subhandlers for this submessage field. */ 2068 #define SUBH(h, selector) (h->sub[selector]) 2069 2070 /* The selector for a submessage field is the field index. */ 2071 #define SUBH_F(h, f) SUBH(h, f->index_) 2072 2073 static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f, 2074 upb_handlertype_t type) { 2075 upb_selector_t sel; 2076 assert(!upb_handlers_isfrozen(h)); 2077 if (upb_handlers_msgdef(h) != upb_fielddef_containingtype(f)) { 2078 upb_status_seterrf( 2079 &h->status_, "type mismatch: field %s does not belong to message %s", 2080 upb_fielddef_name(f), upb_msgdef_fullname(upb_handlers_msgdef(h))); 2081 return -1; 2082 } 2083 if (!upb_handlers_getselector(f, type, &sel)) { 2084 upb_status_seterrf( 2085 &h->status_, 2086 "type mismatch: cannot register handler type %d for field %s", 2087 type, upb_fielddef_name(f)); 2088 return -1; 2089 } 2090 return sel; 2091 } 2092 2093 static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f, 2094 upb_handlertype_t type) { 2095 int32_t sel = trygetsel(h, f, type); 2096 assert(sel >= 0); 2097 return sel; 2098 } 2099 2100 static const void **returntype(upb_handlers *h, const upb_fielddef *f, 2101 upb_handlertype_t type) { 2102 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type_; 2103 } 2104 2105 static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, 2106 upb_handlertype_t type, upb_func *func, 2107 upb_handlerattr *attr) { 2108 upb_handlerattr set_attr = UPB_HANDLERATTR_INITIALIZER; 2109 const void *closure_type; 2110 const void **context_closure_type; 2111 2112 assert(!upb_handlers_isfrozen(h)); 2113 2114 if (sel < 0) { 2115 upb_status_seterrmsg(&h->status_, 2116 "incorrect handler type for this field."); 2117 return false; 2118 } 2119 2120 if (h->table[sel].func) { 2121 upb_status_seterrmsg(&h->status_, 2122 "cannot change handler once it has been set."); 2123 return false; 2124 } 2125 2126 if (attr) { 2127 set_attr = *attr; 2128 } 2129 2130 /* Check that the given closure type matches the closure type that has been 2131 * established for this context (if any). */ 2132 closure_type = upb_handlerattr_closuretype(&set_attr); 2133 2134 if (type == UPB_HANDLER_STRING) { 2135 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR); 2136 } else if (f && upb_fielddef_isseq(f) && 2137 type != UPB_HANDLER_STARTSEQ && 2138 type != UPB_HANDLER_ENDSEQ) { 2139 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ); 2140 } else { 2141 context_closure_type = &h->top_closure_type; 2142 } 2143 2144 if (closure_type && *context_closure_type && 2145 closure_type != *context_closure_type) { 2146 /* TODO(haberman): better message for debugging. */ 2147 if (f) { 2148 upb_status_seterrf(&h->status_, 2149 "closure type does not match for field %s", 2150 upb_fielddef_name(f)); 2151 } else { 2152 upb_status_seterrmsg( 2153 &h->status_, "closure type does not match for message-level handler"); 2154 } 2155 return false; 2156 } 2157 2158 if (closure_type) 2159 *context_closure_type = closure_type; 2160 2161 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer 2162 * matches any pre-existing expectations about what type is expected. */ 2163 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) { 2164 const void *return_type = upb_handlerattr_returnclosuretype(&set_attr); 2165 const void *table_return_type = 2166 upb_handlerattr_returnclosuretype(&h->table[sel].attr); 2167 if (return_type && table_return_type && return_type != table_return_type) { 2168 upb_status_seterrmsg(&h->status_, "closure return type does not match"); 2169 return false; 2170 } 2171 2172 if (table_return_type && !return_type) 2173 upb_handlerattr_setreturnclosuretype(&set_attr, table_return_type); 2174 } 2175 2176 h->table[sel].func = (upb_func*)func; 2177 h->table[sel].attr = set_attr; 2178 return true; 2179 } 2180 2181 /* Returns the effective closure type for this handler (which will propagate 2182 * from outer frames if this frame has no START* handler). Not implemented for 2183 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is 2184 * the effective closure type is unspecified (either no handler was registered 2185 * to specify it or the handler that was registered did not specify the closure 2186 * type). */ 2187 const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f, 2188 upb_handlertype_t type) { 2189 const void *ret; 2190 upb_selector_t sel; 2191 2192 assert(type != UPB_HANDLER_STRING); 2193 ret = h->top_closure_type; 2194 2195 if (upb_fielddef_isseq(f) && 2196 type != UPB_HANDLER_STARTSEQ && 2197 type != UPB_HANDLER_ENDSEQ && 2198 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) { 2199 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); 2200 } 2201 2202 if (type == UPB_HANDLER_STRING && 2203 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) { 2204 ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); 2205 } 2206 2207 /* The effective type of the submessage; not used yet. 2208 * if (type == SUBMESSAGE && 2209 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) { 2210 * ret = upb_handlerattr_returnclosuretype(&h->table[sel].attr); 2211 * } */ 2212 2213 return ret; 2214 } 2215 2216 /* Checks whether the START* handler specified by f & type is missing even 2217 * though it is required to convert the established type of an outer frame 2218 * ("closure_type") into the established type of an inner frame (represented in 2219 * the return closure type of this handler's attr. */ 2220 bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type, 2221 upb_status *status) { 2222 const void *closure_type; 2223 const upb_handlerattr *attr; 2224 const void *return_closure_type; 2225 2226 upb_selector_t sel = handlers_getsel(h, f, type); 2227 if (h->table[sel].func) return true; 2228 closure_type = effective_closure_type(h, f, type); 2229 attr = &h->table[sel].attr; 2230 return_closure_type = upb_handlerattr_returnclosuretype(attr); 2231 if (closure_type && return_closure_type && 2232 closure_type != return_closure_type) { 2233 upb_status_seterrf(status, 2234 "expected start handler to return sub type for field %f", 2235 upb_fielddef_name(f)); 2236 return false; 2237 } 2238 return true; 2239 } 2240 2241 /* Public interface ***********************************************************/ 2242 2243 upb_handlers *upb_handlers_new(const upb_msgdef *md, const void *owner) { 2244 int extra; 2245 upb_handlers *h; 2246 2247 assert(upb_msgdef_isfrozen(md)); 2248 2249 extra = sizeof(upb_handlers_tabent) * (md->selector_count - 1); 2250 h = calloc(sizeof(*h) + extra, 1); 2251 if (!h) return NULL; 2252 2253 h->msg = md; 2254 upb_msgdef_ref(h->msg, h); 2255 upb_status_clear(&h->status_); 2256 h->sub = calloc(md->submsg_field_count, sizeof(*h->sub)); 2257 if (!h->sub) goto oom; 2258 if (!upb_refcounted_init(upb_handlers_upcast_mutable(h), &vtbl, owner)) 2259 goto oom; 2260 if (!upb_inttable_init(&h->cleanup_, UPB_CTYPE_FPTR)) goto oom; 2261 2262 /* calloc() above initialized all handlers to NULL. */ 2263 return h; 2264 2265 oom: 2266 freehandlers(upb_handlers_upcast_mutable(h)); 2267 return NULL; 2268 } 2269 2270 const upb_handlers *upb_handlers_newfrozen(const upb_msgdef *m, 2271 const void *owner, 2272 upb_handlers_callback *callback, 2273 const void *closure) { 2274 dfs_state state; 2275 upb_handlers *ret; 2276 bool ok; 2277 upb_refcounted *r; 2278 2279 state.callback = callback; 2280 state.closure = closure; 2281 if (!upb_inttable_init(&state.tab, UPB_CTYPE_PTR)) return NULL; 2282 2283 ret = newformsg(m, owner, &state); 2284 2285 upb_inttable_uninit(&state.tab); 2286 if (!ret) return NULL; 2287 2288 r = upb_handlers_upcast_mutable(ret); 2289 ok = upb_refcounted_freeze(&r, 1, NULL, UPB_MAX_HANDLER_DEPTH); 2290 UPB_ASSERT_VAR(ok, ok); 2291 2292 return ret; 2293 } 2294 2295 const upb_status *upb_handlers_status(upb_handlers *h) { 2296 assert(!upb_handlers_isfrozen(h)); 2297 return &h->status_; 2298 } 2299 2300 void upb_handlers_clearerr(upb_handlers *h) { 2301 assert(!upb_handlers_isfrozen(h)); 2302 upb_status_clear(&h->status_); 2303 } 2304 2305 #define SETTER(name, handlerctype, handlertype) \ 2306 bool upb_handlers_set ## name(upb_handlers *h, const upb_fielddef *f, \ 2307 handlerctype func, upb_handlerattr *attr) { \ 2308 int32_t sel = trygetsel(h, f, handlertype); \ 2309 return doset(h, sel, f, handlertype, (upb_func*)func, attr); \ 2310 } 2311 2312 SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32) 2313 SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64) 2314 SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32) 2315 SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64) 2316 SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT) 2317 SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE) 2318 SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL) 2319 SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR) 2320 SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING) 2321 SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR) 2322 SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ) 2323 SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG) 2324 SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG) 2325 SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ) 2326 2327 #undef SETTER 2328 2329 bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, 2330 upb_handlerattr *attr) { 2331 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32, 2332 (upb_func *)func, attr); 2333 } 2334 2335 bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, 2336 upb_handlerattr *attr) { 2337 assert(!upb_handlers_isfrozen(h)); 2338 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32, 2339 (upb_func *)func, attr); 2340 } 2341 2342 bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, 2343 const upb_handlers *sub) { 2344 assert(sub); 2345 assert(!upb_handlers_isfrozen(h)); 2346 assert(upb_fielddef_issubmsg(f)); 2347 if (SUBH_F(h, f)) return false; /* Can't reset. */ 2348 if (upb_msgdef_upcast(upb_handlers_msgdef(sub)) != upb_fielddef_subdef(f)) { 2349 return false; 2350 } 2351 SUBH_F(h, f) = sub; 2352 upb_ref2(sub, h); 2353 return true; 2354 } 2355 2356 const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, 2357 const upb_fielddef *f) { 2358 assert(upb_fielddef_issubmsg(f)); 2359 return SUBH_F(h, f); 2360 } 2361 2362 bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel, 2363 upb_handlerattr *attr) { 2364 if (!upb_handlers_gethandler(h, sel)) 2365 return false; 2366 *attr = h->table[sel].attr; 2367 return true; 2368 } 2369 2370 const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, 2371 upb_selector_t sel) { 2372 /* STARTSUBMSG selector in sel is the field's selector base. */ 2373 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT); 2374 } 2375 2376 const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } 2377 2378 bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) { 2379 bool ok; 2380 if (upb_inttable_lookupptr(&h->cleanup_, p, NULL)) { 2381 return false; 2382 } 2383 ok = upb_inttable_insertptr(&h->cleanup_, p, upb_value_fptr(func)); 2384 UPB_ASSERT_VAR(ok, ok); 2385 return true; 2386 } 2387 2388 2389 /* "Static" methods ***********************************************************/ 2390 2391 bool upb_handlers_freeze(upb_handlers *const*handlers, int n, upb_status *s) { 2392 /* TODO: verify we have a transitive closure. */ 2393 int i; 2394 for (i = 0; i < n; i++) { 2395 upb_msg_field_iter j; 2396 upb_handlers *h = handlers[i]; 2397 2398 if (!upb_ok(&h->status_)) { 2399 upb_status_seterrf(s, "handlers for message %s had error status: %s", 2400 upb_msgdef_fullname(upb_handlers_msgdef(h)), 2401 upb_status_errmsg(&h->status_)); 2402 return false; 2403 } 2404 2405 /* Check that there are no closure mismatches due to missing Start* handlers 2406 * or subhandlers with different type-level types. */ 2407 for(upb_msg_field_begin(&j, h->msg); 2408 !upb_msg_field_done(&j); 2409 upb_msg_field_next(&j)) { 2410 2411 const upb_fielddef *f = upb_msg_iter_field(&j); 2412 if (upb_fielddef_isseq(f)) { 2413 if (!checkstart(h, f, UPB_HANDLER_STARTSEQ, s)) 2414 return false; 2415 } 2416 2417 if (upb_fielddef_isstring(f)) { 2418 if (!checkstart(h, f, UPB_HANDLER_STARTSTR, s)) 2419 return false; 2420 } 2421 2422 if (upb_fielddef_issubmsg(f)) { 2423 bool hashandler = false; 2424 if (upb_handlers_gethandler( 2425 h, handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)) || 2426 upb_handlers_gethandler( 2427 h, handlers_getsel(h, f, UPB_HANDLER_ENDSUBMSG))) { 2428 hashandler = true; 2429 } 2430 2431 if (upb_fielddef_isseq(f) && 2432 (upb_handlers_gethandler( 2433 h, handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)) || 2434 upb_handlers_gethandler( 2435 h, handlers_getsel(h, f, UPB_HANDLER_ENDSEQ)))) { 2436 hashandler = true; 2437 } 2438 2439 if (hashandler && !upb_handlers_getsubhandlers(h, f)) { 2440 /* For now we add an empty subhandlers in this case. It makes the 2441 * decoder code generator simpler, because it only has to handle two 2442 * cases (submessage has handlers or not) as opposed to three 2443 * (submessage has handlers in enclosing message but no subhandlers). 2444 * 2445 * This makes parsing less efficient in the case that we want to 2446 * notice a submessage but skip its contents (like if we're testing 2447 * for submessage presence or counting the number of repeated 2448 * submessages). In this case we will end up parsing the submessage 2449 * field by field and throwing away the results for each, instead of 2450 * skipping the whole delimited thing at once. If this is an issue we 2451 * can revisit it, but do remember that this only arises when you have 2452 * handlers (startseq/startsubmsg/endsubmsg/endseq) set for the 2453 * submessage but no subhandlers. The uses cases for this are 2454 * limited. */ 2455 upb_handlers *sub = upb_handlers_new(upb_fielddef_msgsubdef(f), &sub); 2456 upb_handlers_setsubhandlers(h, f, sub); 2457 upb_handlers_unref(sub, &sub); 2458 } 2459 2460 /* TODO(haberman): check type of submessage. 2461 * This is slightly tricky; also consider whether we should check that 2462 * they match at setsubhandlers time. */ 2463 } 2464 } 2465 } 2466 2467 if (!upb_refcounted_freeze((upb_refcounted*const*)handlers, n, s, 2468 UPB_MAX_HANDLER_DEPTH)) { 2469 return false; 2470 } 2471 2472 return true; 2473 } 2474 2475 upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { 2476 switch (upb_fielddef_type(f)) { 2477 case UPB_TYPE_INT32: 2478 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; 2479 case UPB_TYPE_INT64: return UPB_HANDLER_INT64; 2480 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32; 2481 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64; 2482 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; 2483 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE; 2484 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL; 2485 default: assert(false); return -1; /* Invalid input. */ 2486 } 2487 } 2488 2489 bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, 2490 upb_selector_t *s) { 2491 switch (type) { 2492 case UPB_HANDLER_INT32: 2493 case UPB_HANDLER_INT64: 2494 case UPB_HANDLER_UINT32: 2495 case UPB_HANDLER_UINT64: 2496 case UPB_HANDLER_FLOAT: 2497 case UPB_HANDLER_DOUBLE: 2498 case UPB_HANDLER_BOOL: 2499 if (!upb_fielddef_isprimitive(f) || 2500 upb_handlers_getprimitivehandlertype(f) != type) 2501 return false; 2502 *s = f->selector_base; 2503 break; 2504 case UPB_HANDLER_STRING: 2505 if (upb_fielddef_isstring(f)) { 2506 *s = f->selector_base; 2507 } else if (upb_fielddef_lazy(f)) { 2508 *s = f->selector_base + 3; 2509 } else { 2510 return false; 2511 } 2512 break; 2513 case UPB_HANDLER_STARTSTR: 2514 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { 2515 *s = f->selector_base + 1; 2516 } else { 2517 return false; 2518 } 2519 break; 2520 case UPB_HANDLER_ENDSTR: 2521 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { 2522 *s = f->selector_base + 2; 2523 } else { 2524 return false; 2525 } 2526 break; 2527 case UPB_HANDLER_STARTSEQ: 2528 if (!upb_fielddef_isseq(f)) return false; 2529 *s = f->selector_base - 2; 2530 break; 2531 case UPB_HANDLER_ENDSEQ: 2532 if (!upb_fielddef_isseq(f)) return false; 2533 *s = f->selector_base - 1; 2534 break; 2535 case UPB_HANDLER_STARTSUBMSG: 2536 if (!upb_fielddef_issubmsg(f)) return false; 2537 /* Selectors for STARTSUBMSG are at the beginning of the table so that the 2538 * selector can also be used as an index into the "sub" array of 2539 * subhandlers. The indexes for the two into these two tables are the 2540 * same, except that in the handler table the static selectors come first. */ 2541 *s = f->index_ + UPB_STATIC_SELECTOR_COUNT; 2542 break; 2543 case UPB_HANDLER_ENDSUBMSG: 2544 if (!upb_fielddef_issubmsg(f)) return false; 2545 *s = f->selector_base; 2546 break; 2547 } 2548 assert((size_t)*s < upb_fielddef_containingtype(f)->selector_count); 2549 return true; 2550 } 2551 2552 uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { 2553 return upb_fielddef_isseq(f) ? 2 : 0; 2554 } 2555 2556 uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { 2557 uint32_t ret = 1; 2558 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ 2559 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ 2560 if (upb_fielddef_issubmsg(f)) { 2561 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ 2562 ret += 0; 2563 if (upb_fielddef_lazy(f)) { 2564 /* STARTSTR/ENDSTR/STRING (for lazy) */ 2565 ret += 3; 2566 } 2567 } 2568 return ret; 2569 } 2570 2571 2572 /* upb_handlerattr ************************************************************/ 2573 2574 void upb_handlerattr_init(upb_handlerattr *attr) { 2575 upb_handlerattr from = UPB_HANDLERATTR_INITIALIZER; 2576 memcpy(attr, &from, sizeof(*attr)); 2577 } 2578 2579 void upb_handlerattr_uninit(upb_handlerattr *attr) { 2580 UPB_UNUSED(attr); 2581 } 2582 2583 bool upb_handlerattr_sethandlerdata(upb_handlerattr *attr, const void *hd) { 2584 attr->handler_data_ = hd; 2585 return true; 2586 } 2587 2588 bool upb_handlerattr_setclosuretype(upb_handlerattr *attr, const void *type) { 2589 attr->closure_type_ = type; 2590 return true; 2591 } 2592 2593 const void *upb_handlerattr_closuretype(const upb_handlerattr *attr) { 2594 return attr->closure_type_; 2595 } 2596 2597 bool upb_handlerattr_setreturnclosuretype(upb_handlerattr *attr, 2598 const void *type) { 2599 attr->return_closure_type_ = type; 2600 return true; 2601 } 2602 2603 const void *upb_handlerattr_returnclosuretype(const upb_handlerattr *attr) { 2604 return attr->return_closure_type_; 2605 } 2606 2607 bool upb_handlerattr_setalwaysok(upb_handlerattr *attr, bool alwaysok) { 2608 attr->alwaysok_ = alwaysok; 2609 return true; 2610 } 2611 2612 bool upb_handlerattr_alwaysok(const upb_handlerattr *attr) { 2613 return attr->alwaysok_; 2614 } 2615 2616 /* upb_bufhandle **************************************************************/ 2617 2618 size_t upb_bufhandle_objofs(const upb_bufhandle *h) { 2619 return h->objofs_; 2620 } 2621 2622 /* upb_byteshandler ***********************************************************/ 2623 2624 void upb_byteshandler_init(upb_byteshandler* h) { 2625 memset(h, 0, sizeof(*h)); 2626 } 2627 2628 /* For when we support handlerfree callbacks. */ 2629 void upb_byteshandler_uninit(upb_byteshandler* h) { 2630 UPB_UNUSED(h); 2631 } 2632 2633 bool upb_byteshandler_setstartstr(upb_byteshandler *h, 2634 upb_startstr_handlerfunc *func, void *d) { 2635 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func; 2636 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data_ = d; 2637 return true; 2638 } 2639 2640 bool upb_byteshandler_setstring(upb_byteshandler *h, 2641 upb_string_handlerfunc *func, void *d) { 2642 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func; 2643 h->table[UPB_STRING_SELECTOR].attr.handler_data_ = d; 2644 return true; 2645 } 2646 2647 bool upb_byteshandler_setendstr(upb_byteshandler *h, 2648 upb_endfield_handlerfunc *func, void *d) { 2649 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func; 2650 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data_ = d; 2651 return true; 2652 } 2653 /* 2654 ** upb::RefCounted Implementation 2655 ** 2656 ** Our key invariants are: 2657 ** 1. reference cycles never span groups 2658 ** 2. for ref2(to, from), we increment to's count iff group(from) != group(to) 2659 ** 2660 ** The previous two are how we avoid leaking cycles. Other important 2661 ** invariants are: 2662 ** 3. for mutable objects "from" and "to", if there exists a ref2(to, from) 2663 ** this implies group(from) == group(to). (In practice, what we implement 2664 ** is even stronger; "from" and "to" will share a group if there has *ever* 2665 ** been a ref2(to, from), but all that is necessary for correctness is the 2666 ** weaker one). 2667 ** 4. mutable and immutable objects are never in the same group. 2668 */ 2669 2670 2671 #include <setjmp.h> 2672 #include <stdlib.h> 2673 2674 static void freeobj(upb_refcounted *o); 2675 2676 const char untracked_val; 2677 const void *UPB_UNTRACKED_REF = &untracked_val; 2678 2679 /* arch-specific atomic primitives *******************************************/ 2680 2681 #ifdef UPB_THREAD_UNSAFE /*---------------------------------------------------*/ 2682 2683 static void atomic_inc(uint32_t *a) { (*a)++; } 2684 static bool atomic_dec(uint32_t *a) { return --(*a) == 0; } 2685 2686 #elif defined(__GNUC__) || defined(__clang__) /*------------------------------*/ 2687 2688 static void atomic_inc(uint32_t *a) { __sync_fetch_and_add(a, 1); } 2689 static bool atomic_dec(uint32_t *a) { return __sync_sub_and_fetch(a, 1) == 0; } 2690 2691 #elif defined(WIN32) /*-------------------------------------------------------*/ 2692 2693 #include <Windows.h> 2694 2695 static void atomic_inc(upb_atomic_t *a) { InterlockedIncrement(&a->val); } 2696 static bool atomic_dec(upb_atomic_t *a) { 2697 return InterlockedDecrement(&a->val) == 0; 2698 } 2699 2700 #else 2701 #error Atomic primitives not defined for your platform/CPU. \ 2702 Implement them or compile with UPB_THREAD_UNSAFE. 2703 #endif 2704 2705 /* All static objects point to this refcount. 2706 * It is special-cased in ref/unref below. */ 2707 uint32_t static_refcount = -1; 2708 2709 /* We can avoid atomic ops for statically-declared objects. 2710 * This is a minor optimization but nice since we can avoid degrading under 2711 * contention in this case. */ 2712 2713 static void refgroup(uint32_t *group) { 2714 if (group != &static_refcount) 2715 atomic_inc(group); 2716 } 2717 2718 static bool unrefgroup(uint32_t *group) { 2719 if (group == &static_refcount) { 2720 return false; 2721 } else { 2722 return atomic_dec(group); 2723 } 2724 } 2725 2726 2727 /* Reference tracking (debug only) ********************************************/ 2728 2729 #ifdef UPB_DEBUG_REFS 2730 2731 #ifdef UPB_THREAD_UNSAFE 2732 2733 static void upb_lock() {} 2734 static void upb_unlock() {} 2735 2736 #else 2737 2738 /* User must define functions that lock/unlock a global mutex and link this 2739 * file against them. */ 2740 void upb_lock(); 2741 void upb_unlock(); 2742 2743 #endif 2744 2745 /* UPB_DEBUG_REFS mode counts on being able to malloc() memory in some 2746 * code-paths that can normally never fail, like upb_refcounted_ref(). Since 2747 * we have no way to propagage out-of-memory errors back to the user, and since 2748 * these errors can only occur in UPB_DEBUG_REFS mode, we immediately fail. */ 2749 #define CHECK_OOM(predicate) if (!(predicate)) { assert(predicate); exit(1); } 2750 2751 typedef struct { 2752 int count; /* How many refs there are (duplicates only allowed for ref2). */ 2753 bool is_ref2; 2754 } trackedref; 2755 2756 static trackedref *trackedref_new(bool is_ref2) { 2757 trackedref *ret = malloc(sizeof(*ret)); 2758 CHECK_OOM(ret); 2759 ret->count = 1; 2760 ret->is_ref2 = is_ref2; 2761 return ret; 2762 } 2763 2764 static void track(const upb_refcounted *r, const void *owner, bool ref2) { 2765 upb_value v; 2766 2767 assert(owner); 2768 if (owner == UPB_UNTRACKED_REF) return; 2769 2770 upb_lock(); 2771 if (upb_inttable_lookupptr(r->refs, owner, &v)) { 2772 trackedref *ref = upb_value_getptr(v); 2773 /* Since we allow multiple ref2's for the same to/from pair without 2774 * allocating separate memory for each one, we lose the fine-grained 2775 * tracking behavior we get with regular refs. Since ref2s only happen 2776 * inside upb, we'll accept this limitation until/unless there is a really 2777 * difficult upb-internal bug that can't be figured out without it. */ 2778 assert(ref2); 2779 assert(ref->is_ref2); 2780 ref->count++; 2781 } else { 2782 trackedref *ref = trackedref_new(ref2); 2783 bool ok = upb_inttable_insertptr(r->refs, owner, upb_value_ptr(ref)); 2784 CHECK_OOM(ok); 2785 if (ref2) { 2786 /* We know this cast is safe when it is a ref2, because it's coming from 2787 * another refcounted object. */ 2788 const upb_refcounted *from = owner; 2789 assert(!upb_inttable_lookupptr(from->ref2s, r, NULL)); 2790 ok = upb_inttable_insertptr(from->ref2s, r, upb_value_ptr(NULL)); 2791 CHECK_OOM(ok); 2792 } 2793 } 2794 upb_unlock(); 2795 } 2796 2797 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { 2798 upb_value v; 2799 bool found; 2800 trackedref *ref; 2801 2802 assert(owner); 2803 if (owner == UPB_UNTRACKED_REF) return; 2804 2805 upb_lock(); 2806 found = upb_inttable_lookupptr(r->refs, owner, &v); 2807 /* This assert will fail if an owner attempts to release a ref it didn't have. */ 2808 UPB_ASSERT_VAR(found, found); 2809 ref = upb_value_getptr(v); 2810 assert(ref->is_ref2 == ref2); 2811 if (--ref->count == 0) { 2812 free(ref); 2813 upb_inttable_removeptr(r->refs, owner, NULL); 2814 if (ref2) { 2815 /* We know this cast is safe when it is a ref2, because it's coming from 2816 * another refcounted object. */ 2817 const upb_refcounted *from = owner; 2818 bool removed = upb_inttable_removeptr(from->ref2s, r, NULL); 2819 assert(removed); 2820 } 2821 } 2822 upb_unlock(); 2823 } 2824 2825 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { 2826 upb_value v; 2827 bool found; 2828 trackedref *ref; 2829 2830 upb_lock(); 2831 found = upb_inttable_lookupptr(r->refs, owner, &v); 2832 UPB_ASSERT_VAR(found, found); 2833 ref = upb_value_getptr(v); 2834 assert(ref->is_ref2 == ref2); 2835 upb_unlock(); 2836 } 2837 2838 /* Populates the given UPB_CTYPE_INT32 inttable with counts of ref2's that 2839 * originate from the given owner. */ 2840 static void getref2s(const upb_refcounted *owner, upb_inttable *tab) { 2841 upb_inttable_iter i; 2842 2843 upb_lock(); 2844 upb_inttable_begin(&i, owner->ref2s); 2845 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 2846 upb_value v; 2847 upb_value count; 2848 trackedref *ref; 2849 bool ok; 2850 bool found; 2851 2852 upb_refcounted *to = (upb_refcounted*)upb_inttable_iter_key(&i); 2853 2854 /* To get the count we need to look in the target's table. */ 2855 found = upb_inttable_lookupptr(to->refs, owner, &v); 2856 assert(found); 2857 ref = upb_value_getptr(v); 2858 count = upb_value_int32(ref->count); 2859 2860 ok = upb_inttable_insertptr(tab, to, count); 2861 CHECK_OOM(ok); 2862 } 2863 upb_unlock(); 2864 } 2865 2866 typedef struct { 2867 upb_inttable ref2; 2868 const upb_refcounted *obj; 2869 } check_state; 2870 2871 static void visit_check(const upb_refcounted *obj, const upb_refcounted *subobj, 2872 void *closure) { 2873 check_state *s = closure; 2874 upb_inttable *ref2 = &s->ref2; 2875 upb_value v; 2876 bool removed; 2877 int32_t newcount; 2878 2879 assert(obj == s->obj); 2880 assert(subobj); 2881 removed = upb_inttable_removeptr(ref2, subobj, &v); 2882 /* The following assertion will fail if the visit() function visits a subobj 2883 * that it did not have a ref2 on, or visits the same subobj too many times. */ 2884 assert(removed); 2885 newcount = upb_value_getint32(v) - 1; 2886 if (newcount > 0) { 2887 upb_inttable_insert(ref2, (uintptr_t)subobj, upb_value_int32(newcount)); 2888 } 2889 } 2890 2891 static void visit(const upb_refcounted *r, upb_refcounted_visit *v, 2892 void *closure) { 2893 bool ok; 2894 2895 /* In DEBUG_REFS mode we know what existing ref2 refs there are, so we know 2896 * exactly the set of nodes that visit() should visit. So we verify visit()'s 2897 * correctness here. */ 2898 check_state state; 2899 state.obj = r; 2900 ok = upb_inttable_init(&state.ref2, UPB_CTYPE_INT32); 2901 CHECK_OOM(ok); 2902 getref2s(r, &state.ref2); 2903 2904 /* This should visit any children in the ref2 table. */ 2905 if (r->vtbl->visit) r->vtbl->visit(r, visit_check, &state); 2906 2907 /* This assertion will fail if the visit() function missed any children. */ 2908 assert(upb_inttable_count(&state.ref2) == 0); 2909 upb_inttable_uninit(&state.ref2); 2910 if (r->vtbl->visit) r->vtbl->visit(r, v, closure); 2911 } 2912 2913 static bool trackinit(upb_refcounted *r) { 2914 r->refs = malloc(sizeof(*r->refs)); 2915 r->ref2s = malloc(sizeof(*r->ref2s)); 2916 if (!r->refs || !r->ref2s) goto err1; 2917 2918 if (!upb_inttable_init(r->refs, UPB_CTYPE_PTR)) goto err1; 2919 if (!upb_inttable_init(r->ref2s, UPB_CTYPE_PTR)) goto err2; 2920 return true; 2921 2922 err2: 2923 upb_inttable_uninit(r->refs); 2924 err1: 2925 free(r->refs); 2926 free(r->ref2s); 2927 return false; 2928 } 2929 2930 static void trackfree(const upb_refcounted *r) { 2931 upb_inttable_uninit(r->refs); 2932 upb_inttable_uninit(r->ref2s); 2933 free(r->refs); 2934 free(r->ref2s); 2935 } 2936 2937 #else 2938 2939 static void track(const upb_refcounted *r, const void *owner, bool ref2) { 2940 UPB_UNUSED(r); 2941 UPB_UNUSED(owner); 2942 UPB_UNUSED(ref2); 2943 } 2944 2945 static void untrack(const upb_refcounted *r, const void *owner, bool ref2) { 2946 UPB_UNUSED(r); 2947 UPB_UNUSED(owner); 2948 UPB_UNUSED(ref2); 2949 } 2950 2951 static void checkref(const upb_refcounted *r, const void *owner, bool ref2) { 2952 UPB_UNUSED(r); 2953 UPB_UNUSED(owner); 2954 UPB_UNUSED(ref2); 2955 } 2956 2957 static bool trackinit(upb_refcounted *r) { 2958 UPB_UNUSED(r); 2959 return true; 2960 } 2961 2962 static void trackfree(const upb_refcounted *r) { 2963 UPB_UNUSED(r); 2964 } 2965 2966 static void visit(const upb_refcounted *r, upb_refcounted_visit *v, 2967 void *closure) { 2968 if (r->vtbl->visit) r->vtbl->visit(r, v, closure); 2969 } 2970 2971 #endif /* UPB_DEBUG_REFS */ 2972 2973 2974 /* freeze() *******************************************************************/ 2975 2976 /* The freeze() operation is by far the most complicated part of this scheme. 2977 * We compute strongly-connected components and then mutate the graph such that 2978 * we preserve the invariants documented at the top of this file. And we must 2979 * handle out-of-memory errors gracefully (without leaving the graph 2980 * inconsistent), which adds to the fun. */ 2981 2982 /* The state used by the freeze operation (shared across many functions). */ 2983 typedef struct { 2984 int depth; 2985 int maxdepth; 2986 uint64_t index; 2987 /* Maps upb_refcounted* -> attributes (color, etc). attr layout varies by 2988 * color. */ 2989 upb_inttable objattr; 2990 upb_inttable stack; /* stack of upb_refcounted* for Tarjan's algorithm. */ 2991 upb_inttable groups; /* array of uint32_t*, malloc'd refcounts for new groups */ 2992 upb_status *status; 2993 jmp_buf err; 2994 } tarjan; 2995 2996 static void release_ref2(const upb_refcounted *obj, 2997 const upb_refcounted *subobj, 2998 void *closure); 2999 3000 /* Node attributes -----------------------------------------------------------*/ 3001 3002 /* After our analysis phase all nodes will be either GRAY or WHITE. */ 3003 3004 typedef enum { 3005 BLACK = 0, /* Object has not been seen. */ 3006 GRAY, /* Object has been found via a refgroup but may not be reachable. */ 3007 GREEN, /* Object is reachable and is currently on the Tarjan stack. */ 3008 WHITE /* Object is reachable and has been assigned a group (SCC). */ 3009 } color_t; 3010 3011 UPB_NORETURN static void err(tarjan *t) { longjmp(t->err, 1); } 3012 UPB_NORETURN static void oom(tarjan *t) { 3013 upb_status_seterrmsg(t->status, "out of memory"); 3014 err(t); 3015 } 3016 3017 static uint64_t trygetattr(const tarjan *t, const upb_refcounted *r) { 3018 upb_value v; 3019 return upb_inttable_lookupptr(&t->objattr, r, &v) ? 3020 upb_value_getuint64(v) : 0; 3021 } 3022 3023 static uint64_t getattr(const tarjan *t, const upb_refcounted *r) { 3024 upb_value v; 3025 bool found = upb_inttable_lookupptr(&t->objattr, r, &v); 3026 UPB_ASSERT_VAR(found, found); 3027 return upb_value_getuint64(v); 3028 } 3029 3030 static void setattr(tarjan *t, const upb_refcounted *r, uint64_t attr) { 3031 upb_inttable_removeptr(&t->objattr, r, NULL); 3032 upb_inttable_insertptr(&t->objattr, r, upb_value_uint64(attr)); 3033 } 3034 3035 static color_t color(tarjan *t, const upb_refcounted *r) { 3036 return trygetattr(t, r) & 0x3; /* Color is always stored in the low 2 bits. */ 3037 } 3038 3039 static void set_gray(tarjan *t, const upb_refcounted *r) { 3040 assert(color(t, r) == BLACK); 3041 setattr(t, r, GRAY); 3042 } 3043 3044 /* Pushes an obj onto the Tarjan stack and sets it to GREEN. */ 3045 static void push(tarjan *t, const upb_refcounted *r) { 3046 assert(color(t, r) == BLACK || color(t, r) == GRAY); 3047 /* This defines the attr layout for the GREEN state. "index" and "lowlink" 3048 * get 31 bits, which is plenty (limit of 2B objects frozen at a time). */ 3049 setattr(t, r, GREEN | (t->index << 2) | (t->index << 33)); 3050 if (++t->index == 0x80000000) { 3051 upb_status_seterrmsg(t->status, "too many objects to freeze"); 3052 err(t); 3053 } 3054 upb_inttable_push(&t->stack, upb_value_ptr((void*)r)); 3055 } 3056 3057 /* Pops an obj from the Tarjan stack and sets it to WHITE, with a ptr to its 3058 * SCC group. */ 3059 static upb_refcounted *pop(tarjan *t) { 3060 upb_refcounted *r = upb_value_getptr(upb_inttable_pop(&t->stack)); 3061 assert(color(t, r) == GREEN); 3062 /* This defines the attr layout for nodes in the WHITE state. 3063 * Top of group stack is [group, NULL]; we point at group. */ 3064 setattr(t, r, WHITE | (upb_inttable_count(&t->groups) - 2) << 8); 3065 return r; 3066 } 3067 3068 static void tarjan_newgroup(tarjan *t) { 3069 uint32_t *group = malloc(sizeof(*group)); 3070 if (!group) oom(t); 3071 /* Push group and empty group leader (we'll fill in leader later). */ 3072 if (!upb_inttable_push(&t->groups, upb_value_ptr(group)) || 3073 !upb_inttable_push(&t->groups, upb_value_ptr(NULL))) { 3074 free(group); 3075 oom(t); 3076 } 3077 *group = 0; 3078 } 3079 3080 static uint32_t idx(tarjan *t, const upb_refcounted *r) { 3081 assert(color(t, r) == GREEN); 3082 return (getattr(t, r) >> 2) & 0x7FFFFFFF; 3083 } 3084 3085 static uint32_t lowlink(tarjan *t, const upb_refcounted *r) { 3086 if (color(t, r) == GREEN) { 3087 return getattr(t, r) >> 33; 3088 } else { 3089 return UINT32_MAX; 3090 } 3091 } 3092 3093 static void set_lowlink(tarjan *t, const upb_refcounted *r, uint32_t lowlink) { 3094 assert(color(t, r) == GREEN); 3095 setattr(t, r, ((uint64_t)lowlink << 33) | (getattr(t, r) & 0x1FFFFFFFF)); 3096 } 3097 3098 static uint32_t *group(tarjan *t, upb_refcounted *r) { 3099 uint64_t groupnum; 3100 upb_value v; 3101 bool found; 3102 3103 assert(color(t, r) == WHITE); 3104 groupnum = getattr(t, r) >> 8; 3105 found = upb_inttable_lookup(&t->groups, groupnum, &v); 3106 UPB_ASSERT_VAR(found, found); 3107 return upb_value_getptr(v); 3108 } 3109 3110 /* If the group leader for this object's group has not previously been set, 3111 * the given object is assigned to be its leader. */ 3112 static upb_refcounted *groupleader(tarjan *t, upb_refcounted *r) { 3113 uint64_t leader_slot; 3114 upb_value v; 3115 bool found; 3116 3117 assert(color(t, r) == WHITE); 3118 leader_slot = (getattr(t, r) >> 8) + 1; 3119 found = upb_inttable_lookup(&t->groups, leader_slot, &v); 3120 UPB_ASSERT_VAR(found, found); 3121 if (upb_value_getptr(v)) { 3122 return upb_value_getptr(v); 3123 } else { 3124 upb_inttable_remove(&t->groups, leader_slot, NULL); 3125 upb_inttable_insert(&t->groups, leader_slot, upb_value_ptr(r)); 3126 return r; 3127 } 3128 } 3129 3130 3131 /* Tarjan's algorithm --------------------------------------------------------*/ 3132 3133 /* See: 3134 * http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm */ 3135 static void do_tarjan(const upb_refcounted *obj, tarjan *t); 3136 3137 static void tarjan_visit(const upb_refcounted *obj, 3138 const upb_refcounted *subobj, 3139 void *closure) { 3140 tarjan *t = closure; 3141 if (++t->depth > t->maxdepth) { 3142 upb_status_seterrf(t->status, "graph too deep to freeze (%d)", t->maxdepth); 3143 err(t); 3144 } else if (subobj->is_frozen || color(t, subobj) == WHITE) { 3145 /* Do nothing: we don't want to visit or color already-frozen nodes, 3146 * and WHITE nodes have already been assigned a SCC. */ 3147 } else if (color(t, subobj) < GREEN) { 3148 /* Subdef has not yet been visited; recurse on it. */ 3149 do_tarjan(subobj, t); 3150 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), lowlink(t, subobj))); 3151 } else if (color(t, subobj) == GREEN) { 3152 /* Subdef is in the stack and hence in the current SCC. */ 3153 set_lowlink(t, obj, UPB_MIN(lowlink(t, obj), idx(t, subobj))); 3154 } 3155 --t->depth; 3156 } 3157 3158 static void do_tarjan(const upb_refcounted *obj, tarjan *t) { 3159 if (color(t, obj) == BLACK) { 3160 /* We haven't seen this object's group; mark the whole group GRAY. */ 3161 const upb_refcounted *o = obj; 3162 do { set_gray(t, o); } while ((o = o->next) != obj); 3163 } 3164 3165 push(t, obj); 3166 visit(obj, tarjan_visit, t); 3167 if (lowlink(t, obj) == idx(t, obj)) { 3168 tarjan_newgroup(t); 3169 while (pop(t) != obj) 3170 ; 3171 } 3172 } 3173 3174 3175 /* freeze() ------------------------------------------------------------------*/ 3176 3177 static void crossref(const upb_refcounted *r, const upb_refcounted *subobj, 3178 void *_t) { 3179 tarjan *t = _t; 3180 assert(color(t, r) > BLACK); 3181 if (color(t, subobj) > BLACK && r->group != subobj->group) { 3182 /* Previously this ref was not reflected in subobj->group because they 3183 * were in the same group; now that they are split a ref must be taken. */ 3184 refgroup(subobj->group); 3185 } 3186 } 3187 3188 static bool freeze(upb_refcounted *const*roots, int n, upb_status *s, 3189 int maxdepth) { 3190 volatile bool ret = false; 3191 int i; 3192 upb_inttable_iter iter; 3193 3194 /* We run in two passes so that we can allocate all memory before performing 3195 * any mutation of the input -- this allows us to leave the input unchanged 3196 * in the case of memory allocation failure. */ 3197 tarjan t; 3198 t.index = 0; 3199 t.depth = 0; 3200 t.maxdepth = maxdepth; 3201 t.status = s; 3202 if (!upb_inttable_init(&t.objattr, UPB_CTYPE_UINT64)) goto err1; 3203 if (!upb_inttable_init(&t.stack, UPB_CTYPE_PTR)) goto err2; 3204 if (!upb_inttable_init(&t.groups, UPB_CTYPE_PTR)) goto err3; 3205 if (setjmp(t.err) != 0) goto err4; 3206 3207 3208 for (i = 0; i < n; i++) { 3209 if (color(&t, roots[i]) < GREEN) { 3210 do_tarjan(roots[i], &t); 3211 } 3212 } 3213 3214 /* If we've made it this far, no further errors are possible so it's safe to 3215 * mutate the objects without risk of leaving them in an inconsistent state. */ 3216 ret = true; 3217 3218 /* The transformation that follows requires care. The preconditions are: 3219 * - all objects in attr map are WHITE or GRAY, and are in mutable groups 3220 * (groups of all mutable objs) 3221 * - no ref2(to, from) refs have incremented count(to) if both "to" and 3222 * "from" are in our attr map (this follows from invariants (2) and (3)) */ 3223 3224 /* Pass 1: we remove WHITE objects from their mutable groups, and add them to 3225 * new groups according to the SCC's we computed. These new groups will 3226 * consist of only frozen objects. None will be immediately collectible, 3227 * because WHITE objects are by definition reachable from one of "roots", 3228 * which the caller must own refs on. */ 3229 upb_inttable_begin(&iter, &t.objattr); 3230 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { 3231 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); 3232 /* Since removal from a singly-linked list requires access to the object's 3233 * predecessor, we consider obj->next instead of obj for moving. With the 3234 * while() loop we guarantee that we will visit every node's predecessor. 3235 * Proof: 3236 * 1. every node's predecessor is in our attr map. 3237 * 2. though the loop body may change a node's predecessor, it will only 3238 * change it to be the node we are currently operating on, so with a 3239 * while() loop we guarantee ourselves the chance to remove each node. */ 3240 while (color(&t, obj->next) == WHITE && 3241 group(&t, obj->next) != obj->next->group) { 3242 upb_refcounted *leader; 3243 3244 /* Remove from old group. */ 3245 upb_refcounted *move = obj->next; 3246 if (obj == move) { 3247 /* Removing the last object from a group. */ 3248 assert(*obj->group == obj->individual_count); 3249 free(obj->group); 3250 } else { 3251 obj->next = move->next; 3252 /* This may decrease to zero; we'll collect GRAY objects (if any) that 3253 * remain in the group in the third pass. */ 3254 assert(*move->group >= move->individual_count); 3255 *move->group -= move->individual_count; 3256 } 3257 3258 /* Add to new group. */ 3259 leader = groupleader(&t, move); 3260 if (move == leader) { 3261 /* First object added to new group is its leader. */ 3262 move->group = group(&t, move); 3263 move->next = move; 3264 *move->group = move->individual_count; 3265 } else { 3266 /* Group already has at least one object in it. */ 3267 assert(leader->group == group(&t, move)); 3268 move->group = group(&t, move); 3269 move->next = leader->next; 3270 leader->next = move; 3271 *move->group += move->individual_count; 3272 } 3273 3274 move->is_frozen = true; 3275 } 3276 } 3277 3278 /* Pass 2: GRAY and WHITE objects "obj" with ref2(to, obj) references must 3279 * increment count(to) if group(obj) != group(to) (which could now be the 3280 * case if "to" was just frozen). */ 3281 upb_inttable_begin(&iter, &t.objattr); 3282 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { 3283 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); 3284 visit(obj, crossref, &t); 3285 } 3286 3287 /* Pass 3: GRAY objects are collected if their group's refcount dropped to 3288 * zero when we removed its white nodes. This can happen if they had only 3289 * been kept alive by virtue of sharing a group with an object that was just 3290 * frozen. 3291 * 3292 * It is important that we do this last, since the GRAY object's free() 3293 * function could call unref2() on just-frozen objects, which will decrement 3294 * refs that were added in pass 2. */ 3295 upb_inttable_begin(&iter, &t.objattr); 3296 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) { 3297 upb_refcounted *obj = (upb_refcounted*)upb_inttable_iter_key(&iter); 3298 if (obj->group == NULL || *obj->group == 0) { 3299 if (obj->group) { 3300 upb_refcounted *o; 3301 3302 /* We eagerly free() the group's count (since we can't easily determine 3303 * the group's remaining size it's the easiest way to ensure it gets 3304 * done). */ 3305 free(obj->group); 3306 3307 /* Visit to release ref2's (done in a separate pass since release_ref2 3308 * depends on o->group being unmodified so it can test merged()). */ 3309 o = obj; 3310 do { visit(o, release_ref2, NULL); } while ((o = o->next) != obj); 3311 3312 /* Mark "group" fields as NULL so we know to free the objects later in 3313 * this loop, but also don't try to delete the group twice. */ 3314 o = obj; 3315 do { o->group = NULL; } while ((o = o->next) != obj); 3316 } 3317 freeobj(obj); 3318 } 3319 } 3320 3321 err4: 3322 if (!ret) { 3323 upb_inttable_begin(&iter, &t.groups); 3324 for(; !upb_inttable_done(&iter); upb_inttable_next(&iter)) 3325 free(upb_value_getptr(upb_inttable_iter_value(&iter))); 3326 } 3327 upb_inttable_uninit(&t.groups); 3328 err3: 3329 upb_inttable_uninit(&t.stack); 3330 err2: 3331 upb_inttable_uninit(&t.objattr); 3332 err1: 3333 return ret; 3334 } 3335 3336 3337 /* Misc internal functions ***************************************************/ 3338 3339 static bool merged(const upb_refcounted *r, const upb_refcounted *r2) { 3340 return r->group == r2->group; 3341 } 3342 3343 static void merge(upb_refcounted *r, upb_refcounted *from) { 3344 upb_refcounted *base; 3345 upb_refcounted *tmp; 3346 3347 if (merged(r, from)) return; 3348 *r->group += *from->group; 3349 free(from->group); 3350 base = from; 3351 3352 /* Set all refcount pointers in the "from" chain to the merged refcount. 3353 * 3354 * TODO(haberman): this linear algorithm can result in an overall O(n^2) bound 3355 * if the user continuously extends a group by one object. Prevent this by 3356 * using one of the techniques in this paper: 3357 * ftp://www.ncedc.org/outgoing/geomorph/dino/orals/p245-tarjan.pdf */ 3358 do { from->group = r->group; } while ((from = from->next) != base); 3359 3360 /* Merge the two circularly linked lists by swapping their next pointers. */ 3361 tmp = r->next; 3362 r->next = base->next; 3363 base->next = tmp; 3364 } 3365 3366 static void unref(const upb_refcounted *r); 3367 3368 static void release_ref2(const upb_refcounted *obj, 3369 const upb_refcounted *subobj, 3370 void *closure) { 3371 UPB_UNUSED(closure); 3372 untrack(subobj, obj, true); 3373 if (!merged(obj, subobj)) { 3374 assert(subobj->is_frozen); 3375 unref(subobj); 3376 } 3377 } 3378 3379 static void unref(const upb_refcounted *r) { 3380 if (unrefgroup(r->group)) { 3381 const upb_refcounted *o; 3382 3383 free(r->group); 3384 3385 /* In two passes, since release_ref2 needs a guarantee that any subobjs 3386 * are alive. */ 3387 o = r; 3388 do { visit(o, release_ref2, NULL); } while((o = o->next) != r); 3389 3390 o = r; 3391 do { 3392 const upb_refcounted *next = o->next; 3393 assert(o->is_frozen || o->individual_count == 0); 3394 freeobj((upb_refcounted*)o); 3395 o = next; 3396 } while(o != r); 3397 } 3398 } 3399 3400 static void freeobj(upb_refcounted *o) { 3401 trackfree(o); 3402 o->vtbl->free((upb_refcounted*)o); 3403 } 3404 3405 3406 /* Public interface ***********************************************************/ 3407 3408 bool upb_refcounted_init(upb_refcounted *r, 3409 const struct upb_refcounted_vtbl *vtbl, 3410 const void *owner) { 3411 #ifndef NDEBUG 3412 /* Endianness check. This is unrelated to upb_refcounted, it's just a 3413 * convenient place to put the check that we can be assured will run for 3414 * basically every program using upb. */ 3415 const int x = 1; 3416 #ifdef UPB_BIG_ENDIAN 3417 assert(*(char*)&x != 1); 3418 #else 3419 assert(*(char*)&x == 1); 3420 #endif 3421 #endif 3422 3423 r->next = r; 3424 r->vtbl = vtbl; 3425 r->individual_count = 0; 3426 r->is_frozen = false; 3427 r->group = malloc(sizeof(*r->group)); 3428 if (!r->group) return false; 3429 *r->group = 0; 3430 if (!trackinit(r)) { 3431 free(r->group); 3432 return false; 3433 } 3434 upb_refcounted_ref(r, owner); 3435 return true; 3436 } 3437 3438 bool upb_refcounted_isfrozen(const upb_refcounted *r) { 3439 return r->is_frozen; 3440 } 3441 3442 void upb_refcounted_ref(const upb_refcounted *r, const void *owner) { 3443 track(r, owner, false); 3444 if (!r->is_frozen) 3445 ((upb_refcounted*)r)->individual_count++; 3446 refgroup(r->group); 3447 } 3448 3449 void upb_refcounted_unref(const upb_refcounted *r, const void *owner) { 3450 untrack(r, owner, false); 3451 if (!r->is_frozen) 3452 ((upb_refcounted*)r)->individual_count--; 3453 unref(r); 3454 } 3455 3456 void upb_refcounted_ref2(const upb_refcounted *r, upb_refcounted *from) { 3457 assert(!from->is_frozen); /* Non-const pointer implies this. */ 3458 track(r, from, true); 3459 if (r->is_frozen) { 3460 refgroup(r->group); 3461 } else { 3462 merge((upb_refcounted*)r, from); 3463 } 3464 } 3465 3466 void upb_refcounted_unref2(const upb_refcounted *r, upb_refcounted *from) { 3467 assert(!from->is_frozen); /* Non-const pointer implies this. */ 3468 untrack(r, from, true); 3469 if (r->is_frozen) { 3470 unref(r); 3471 } else { 3472 assert(merged(r, from)); 3473 } 3474 } 3475 3476 void upb_refcounted_donateref( 3477 const upb_refcounted *r, const void *from, const void *to) { 3478 assert(from != to); 3479 if (to != NULL) 3480 upb_refcounted_ref(r, to); 3481 if (from != NULL) 3482 upb_refcounted_unref(r, from); 3483 } 3484 3485 void upb_refcounted_checkref(const upb_refcounted *r, const void *owner) { 3486 checkref(r, owner, false); 3487 } 3488 3489 bool upb_refcounted_freeze(upb_refcounted *const*roots, int n, upb_status *s, 3490 int maxdepth) { 3491 int i; 3492 for (i = 0; i < n; i++) { 3493 assert(!roots[i]->is_frozen); 3494 } 3495 return freeze(roots, n, s, maxdepth); 3496 } 3497 3498 3499 #include <stdlib.h> 3500 3501 /* Fallback implementation if the shim is not specialized by the JIT. */ 3502 #define SHIM_WRITER(type, ctype) \ 3503 bool upb_shim_set ## type (void *c, const void *hd, ctype val) { \ 3504 uint8_t *m = c; \ 3505 const upb_shim_data *d = hd; \ 3506 if (d->hasbit > 0) \ 3507 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ 3508 *(ctype*)&m[d->offset] = val; \ 3509 return true; \ 3510 } \ 3511 3512 SHIM_WRITER(double, double) 3513 SHIM_WRITER(float, float) 3514 SHIM_WRITER(int32, int32_t) 3515 SHIM_WRITER(int64, int64_t) 3516 SHIM_WRITER(uint32, uint32_t) 3517 SHIM_WRITER(uint64, uint64_t) 3518 SHIM_WRITER(bool, bool) 3519 #undef SHIM_WRITER 3520 3521 bool upb_shim_set(upb_handlers *h, const upb_fielddef *f, size_t offset, 3522 int32_t hasbit) { 3523 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; 3524 bool ok; 3525 3526 upb_shim_data *d = malloc(sizeof(*d)); 3527 if (!d) return false; 3528 d->offset = offset; 3529 d->hasbit = hasbit; 3530 3531 upb_handlerattr_sethandlerdata(&attr, d); 3532 upb_handlerattr_setalwaysok(&attr, true); 3533 upb_handlers_addcleanup(h, d, free); 3534 3535 #define TYPE(u, l) \ 3536 case UPB_TYPE_##u: \ 3537 ok = upb_handlers_set##l(h, f, upb_shim_set##l, &attr); break; 3538 3539 ok = false; 3540 3541 switch (upb_fielddef_type(f)) { 3542 TYPE(INT64, int64); 3543 TYPE(INT32, int32); 3544 TYPE(ENUM, int32); 3545 TYPE(UINT64, uint64); 3546 TYPE(UINT32, uint32); 3547 TYPE(DOUBLE, double); 3548 TYPE(FLOAT, float); 3549 TYPE(BOOL, bool); 3550 default: assert(false); break; 3551 } 3552 #undef TYPE 3553 3554 upb_handlerattr_uninit(&attr); 3555 return ok; 3556 } 3557 3558 const upb_shim_data *upb_shim_getdata(const upb_handlers *h, upb_selector_t s, 3559 upb_fieldtype_t *type) { 3560 upb_func *f = upb_handlers_gethandler(h, s); 3561 3562 if ((upb_int64_handlerfunc*)f == upb_shim_setint64) { 3563 *type = UPB_TYPE_INT64; 3564 } else if ((upb_int32_handlerfunc*)f == upb_shim_setint32) { 3565 *type = UPB_TYPE_INT32; 3566 } else if ((upb_uint64_handlerfunc*)f == upb_shim_setuint64) { 3567 *type = UPB_TYPE_UINT64; 3568 } else if ((upb_uint32_handlerfunc*)f == upb_shim_setuint32) { 3569 *type = UPB_TYPE_UINT32; 3570 } else if ((upb_double_handlerfunc*)f == upb_shim_setdouble) { 3571 *type = UPB_TYPE_DOUBLE; 3572 } else if ((upb_float_handlerfunc*)f == upb_shim_setfloat) { 3573 *type = UPB_TYPE_FLOAT; 3574 } else if ((upb_bool_handlerfunc*)f == upb_shim_setbool) { 3575 *type = UPB_TYPE_BOOL; 3576 } else { 3577 return NULL; 3578 } 3579 3580 return (const upb_shim_data*)upb_handlers_gethandlerdata(h, s); 3581 } 3582 3583 3584 #include <stdlib.h> 3585 #include <string.h> 3586 3587 static void upb_symtab_free(upb_refcounted *r) { 3588 upb_symtab *s = (upb_symtab*)r; 3589 upb_strtable_iter i; 3590 upb_strtable_begin(&i, &s->symtab); 3591 for (; !upb_strtable_done(&i); upb_strtable_next(&i)) { 3592 const upb_def *def = upb_value_getptr(upb_strtable_iter_value(&i)); 3593 upb_def_unref(def, s); 3594 } 3595 upb_strtable_uninit(&s->symtab); 3596 free(s); 3597 } 3598 3599 3600 upb_symtab *upb_symtab_new(const void *owner) { 3601 static const struct upb_refcounted_vtbl vtbl = {NULL, &upb_symtab_free}; 3602 upb_symtab *s = malloc(sizeof(*s)); 3603 upb_refcounted_init(upb_symtab_upcast_mutable(s), &vtbl, owner); 3604 upb_strtable_init(&s->symtab, UPB_CTYPE_PTR); 3605 return s; 3606 } 3607 3608 void upb_symtab_freeze(upb_symtab *s) { 3609 upb_refcounted *r; 3610 bool ok; 3611 3612 assert(!upb_symtab_isfrozen(s)); 3613 r = upb_symtab_upcast_mutable(s); 3614 /* The symtab does not take ref2's (see refcounted.h) on the defs, because 3615 * defs cannot refer back to the table and therefore cannot create cycles. So 3616 * 0 will suffice for maxdepth here. */ 3617 ok = upb_refcounted_freeze(&r, 1, NULL, 0); 3618 UPB_ASSERT_VAR(ok, ok); 3619 } 3620 3621 const upb_def *upb_symtab_lookup(const upb_symtab *s, const char *sym) { 3622 upb_value v; 3623 upb_def *ret = upb_strtable_lookup(&s->symtab, sym, &v) ? 3624 upb_value_getptr(v) : NULL; 3625 return ret; 3626 } 3627 3628 const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { 3629 upb_value v; 3630 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? 3631 upb_value_getptr(v) : NULL; 3632 return def ? upb_dyncast_msgdef(def) : NULL; 3633 } 3634 3635 const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { 3636 upb_value v; 3637 upb_def *def = upb_strtable_lookup(&s->symtab, sym, &v) ? 3638 upb_value_getptr(v) : NULL; 3639 return def ? upb_dyncast_enumdef(def) : NULL; 3640 } 3641 3642 /* Given a symbol and the base symbol inside which it is defined, find the 3643 * symbol's definition in t. */ 3644 static upb_def *upb_resolvename(const upb_strtable *t, 3645 const char *base, const char *sym) { 3646 if(strlen(sym) == 0) return NULL; 3647 if(sym[0] == '.') { 3648 /* Symbols starting with '.' are absolute, so we do a single lookup. 3649 * Slice to omit the leading '.' */ 3650 upb_value v; 3651 return upb_strtable_lookup(t, sym + 1, &v) ? upb_value_getptr(v) : NULL; 3652 } else { 3653 /* Remove components from base until we find an entry or run out. 3654 * TODO: This branch is totally broken, but currently not used. */ 3655 (void)base; 3656 assert(false); 3657 return NULL; 3658 } 3659 } 3660 3661 const upb_def *upb_symtab_resolve(const upb_symtab *s, const char *base, 3662 const char *sym) { 3663 upb_def *ret = upb_resolvename(&s->symtab, base, sym); 3664 return ret; 3665 } 3666 3667 /* Starts a depth-first traversal at "def", recursing into any subdefs 3668 * (ie. submessage types). Adds duplicates of existing defs to addtab 3669 * wherever necessary, so that the resulting symtab will be consistent once 3670 * addtab is added. 3671 * 3672 * More specifically, if any def D is found in the DFS that: 3673 * 3674 * 1. can reach a def that is being replaced by something in addtab, AND 3675 * 3676 * 2. is not itself being replaced already (ie. this name doesn't already 3677 * exist in addtab) 3678 * 3679 * ...then a duplicate (new copy) of D will be added to addtab. 3680 * 3681 * Returns true if this happened for any def reachable from "def." 3682 * 3683 * It is slightly tricky to do this correctly in the presence of cycles. If we 3684 * detect that our DFS has hit a cycle, we might not yet know if any SCCs on 3685 * our stack can reach a def in addtab or not. Once we figure this out, that 3686 * answer needs to apply to *all* defs in these SCCs, even if we visited them 3687 * already. So a straight up one-pass cycle-detecting DFS won't work. 3688 * 3689 * To work around this problem, we traverse each SCC (which we already 3690 * computed, since these defs are frozen) as a single node. We first compute 3691 * whether the SCC as a whole can reach any def in addtab, then we dup (or not) 3692 * the entire SCC. This requires breaking the encapsulation of upb_refcounted, 3693 * since that is where we get the data about what SCC we are in. */ 3694 static bool upb_resolve_dfs(const upb_def *def, upb_strtable *addtab, 3695 const void *new_owner, upb_inttable *seen, 3696 upb_status *s) { 3697 upb_value v; 3698 bool need_dup; 3699 const upb_def *base; 3700 const void* memoize_key; 3701 3702 /* Memoize results of this function for efficiency (since we're traversing a 3703 * DAG this is not needed to limit the depth of the search). 3704 * 3705 * We memoize by SCC instead of by individual def. */ 3706 memoize_key = def->base.group; 3707 3708 if (upb_inttable_lookupptr(seen, memoize_key, &v)) 3709 return upb_value_getbool(v); 3710 3711 /* Visit submessages for all messages in the SCC. */ 3712 need_dup = false; 3713 base = def; 3714 do { 3715 upb_value v; 3716 const upb_msgdef *m; 3717 3718 assert(upb_def_isfrozen(def)); 3719 if (def->type == UPB_DEF_FIELD) continue; 3720 if (upb_strtable_lookup(addtab, upb_def_fullname(def), &v)) { 3721 need_dup = true; 3722 } 3723 3724 /* For messages, continue the recursion by visiting all subdefs, but only 3725 * ones in different SCCs. */ 3726 m = upb_dyncast_msgdef(def); 3727 if (m) { 3728 upb_msg_field_iter i; 3729 for(upb_msg_field_begin(&i, m); 3730 !upb_msg_field_done(&i); 3731 upb_msg_field_next(&i)) { 3732 upb_fielddef *f = upb_msg_iter_field(&i); 3733 const upb_def *subdef; 3734 3735 if (!upb_fielddef_hassubdef(f)) continue; 3736 subdef = upb_fielddef_subdef(f); 3737 3738 /* Skip subdefs in this SCC. */ 3739 if (def->base.group == subdef->base.group) continue; 3740 3741 /* |= to avoid short-circuit; we need its side-effects. */ 3742 need_dup |= upb_resolve_dfs(subdef, addtab, new_owner, seen, s); 3743 if (!upb_ok(s)) return false; 3744 } 3745 } 3746 } while ((def = (upb_def*)def->base.next) != base); 3747 3748 if (need_dup) { 3749 /* Dup all defs in this SCC that don't already have entries in addtab. */ 3750 def = base; 3751 do { 3752 const char *name; 3753 3754 if (def->type == UPB_DEF_FIELD) continue; 3755 name = upb_def_fullname(def); 3756 if (!upb_strtable_lookup(addtab, name, NULL)) { 3757 upb_def *newdef = upb_def_dup(def, new_owner); 3758 if (!newdef) goto oom; 3759 newdef->came_from_user = false; 3760 if (!upb_strtable_insert(addtab, name, upb_value_ptr(newdef))) 3761 goto oom; 3762 } 3763 } while ((def = (upb_def*)def->base.next) != base); 3764 } 3765 3766 upb_inttable_insertptr(seen, memoize_key, upb_value_bool(need_dup)); 3767 return need_dup; 3768 3769 oom: 3770 upb_status_seterrmsg(s, "out of memory"); 3771 return false; 3772 } 3773 3774 /* TODO(haberman): we need a lot more testing of error conditions. 3775 * The came_from_user stuff in particular is not tested. */ 3776 bool upb_symtab_add(upb_symtab *s, upb_def *const*defs, int n, void *ref_donor, 3777 upb_status *status) { 3778 int i; 3779 upb_strtable_iter iter; 3780 upb_def **add_defs = NULL; 3781 upb_strtable addtab; 3782 upb_inttable seen; 3783 3784 assert(!upb_symtab_isfrozen(s)); 3785 if (!upb_strtable_init(&addtab, UPB_CTYPE_PTR)) { 3786 upb_status_seterrmsg(status, "out of memory"); 3787 return false; 3788 } 3789 3790 /* Add new defs to our "add" set. */ 3791 for (i = 0; i < n; i++) { 3792 upb_def *def = defs[i]; 3793 const char *fullname; 3794 upb_fielddef *f; 3795 3796 if (upb_def_isfrozen(def)) { 3797 upb_status_seterrmsg(status, "added defs must be mutable"); 3798 goto err; 3799 } 3800 assert(!upb_def_isfrozen(def)); 3801 fullname = upb_def_fullname(def); 3802 if (!fullname) { 3803 upb_status_seterrmsg( 3804 status, "Anonymous defs cannot be added to a symtab"); 3805 goto err; 3806 } 3807 3808 f = upb_dyncast_fielddef_mutable(def); 3809 3810 if (f) { 3811 if (!upb_fielddef_containingtypename(f)) { 3812 upb_status_seterrmsg(status, 3813 "Standalone fielddefs must have a containing type " 3814 "(extendee) name set"); 3815 goto err; 3816 } 3817 } else { 3818 if (upb_strtable_lookup(&addtab, fullname, NULL)) { 3819 upb_status_seterrf(status, "Conflicting defs named '%s'", fullname); 3820 goto err; 3821 } 3822 /* We need this to back out properly, because if there is a failure we 3823 * need to donate the ref back to the caller. */ 3824 def->came_from_user = true; 3825 upb_def_donateref(def, ref_donor, s); 3826 if (!upb_strtable_insert(&addtab, fullname, upb_value_ptr(def))) 3827 goto oom_err; 3828 } 3829 } 3830 3831 /* Add standalone fielddefs (ie. extensions) to the appropriate messages. 3832 * If the appropriate message only exists in the existing symtab, duplicate 3833 * it so we have a mutable copy we can add the fields to. */ 3834 for (i = 0; i < n; i++) { 3835 upb_def *def = defs[i]; 3836 upb_fielddef *f = upb_dyncast_fielddef_mutable(def); 3837 const char *msgname; 3838 upb_value v; 3839 upb_msgdef *m; 3840 3841 if (!f) continue; 3842 msgname = upb_fielddef_containingtypename(f); 3843 /* We validated this earlier in this function. */ 3844 assert(msgname); 3845 3846 /* If the extendee name is absolutely qualified, move past the initial ".". 3847 * TODO(haberman): it is not obvious what it would mean if this was not 3848 * absolutely qualified. */ 3849 if (msgname[0] == '.') { 3850 msgname++; 3851 } 3852 3853 if (upb_strtable_lookup(&addtab, msgname, &v)) { 3854 /* Extendee is in the set of defs the user asked us to add. */ 3855 m = upb_value_getptr(v); 3856 } else { 3857 /* Need to find and dup the extendee from the existing symtab. */ 3858 const upb_msgdef *frozen_m = upb_symtab_lookupmsg(s, msgname); 3859 if (!frozen_m) { 3860 upb_status_seterrf(status, 3861 "Tried to extend message %s that does not exist " 3862 "in this SymbolTable.", 3863 msgname); 3864 goto err; 3865 } 3866 m = upb_msgdef_dup(frozen_m, s); 3867 if (!m) goto oom_err; 3868 if (!upb_strtable_insert(&addtab, msgname, upb_value_ptr(m))) { 3869 upb_msgdef_unref(m, s); 3870 goto oom_err; 3871 } 3872 } 3873 3874 if (!upb_msgdef_addfield(m, f, ref_donor, status)) { 3875 goto err; 3876 } 3877 } 3878 3879 /* Add dups of any existing def that can reach a def with the same name as 3880 * anything in our "add" set. */ 3881 if (!upb_inttable_init(&seen, UPB_CTYPE_BOOL)) goto oom_err; 3882 upb_strtable_begin(&iter, &s->symtab); 3883 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { 3884 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); 3885 upb_resolve_dfs(def, &addtab, s, &seen, status); 3886 if (!upb_ok(status)) goto err; 3887 } 3888 upb_inttable_uninit(&seen); 3889 3890 /* Now using the table, resolve symbolic references for subdefs. */ 3891 upb_strtable_begin(&iter, &addtab); 3892 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { 3893 const char *base; 3894 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); 3895 upb_msgdef *m = upb_dyncast_msgdef_mutable(def); 3896 upb_msg_field_iter j; 3897 3898 if (!m) continue; 3899 /* Type names are resolved relative to the message in which they appear. */ 3900 base = upb_msgdef_fullname(m); 3901 3902 for(upb_msg_field_begin(&j, m); 3903 !upb_msg_field_done(&j); 3904 upb_msg_field_next(&j)) { 3905 upb_fielddef *f = upb_msg_iter_field(&j); 3906 const char *name = upb_fielddef_subdefname(f); 3907 if (name && !upb_fielddef_subdef(f)) { 3908 /* Try the lookup in the current set of to-be-added defs first. If not 3909 * there, try existing defs. */ 3910 upb_def *subdef = upb_resolvename(&addtab, base, name); 3911 if (subdef == NULL) { 3912 subdef = upb_resolvename(&s->symtab, base, name); 3913 } 3914 if (subdef == NULL) { 3915 upb_status_seterrf( 3916 status, "couldn't resolve name '%s' in message '%s'", name, base); 3917 goto err; 3918 } else if (!upb_fielddef_setsubdef(f, subdef, status)) { 3919 goto err; 3920 } 3921 } 3922 } 3923 } 3924 3925 /* We need an array of the defs in addtab, for passing to upb_def_freeze. */ 3926 add_defs = malloc(sizeof(void*) * upb_strtable_count(&addtab)); 3927 if (add_defs == NULL) goto oom_err; 3928 upb_strtable_begin(&iter, &addtab); 3929 for (n = 0; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { 3930 add_defs[n++] = upb_value_getptr(upb_strtable_iter_value(&iter)); 3931 } 3932 3933 if (!upb_def_freeze(add_defs, n, status)) goto err; 3934 3935 /* This must be delayed until all errors have been detected, since error 3936 * recovery code uses this table to cleanup defs. */ 3937 upb_strtable_uninit(&addtab); 3938 3939 /* TODO(haberman) we don't properly handle errors after this point (like 3940 * OOM in upb_strtable_insert() below). */ 3941 for (i = 0; i < n; i++) { 3942 upb_def *def = add_defs[i]; 3943 const char *name = upb_def_fullname(def); 3944 upb_value v; 3945 bool success; 3946 3947 if (upb_strtable_remove(&s->symtab, name, &v)) { 3948 const upb_def *def = upb_value_getptr(v); 3949 upb_def_unref(def, s); 3950 } 3951 success = upb_strtable_insert(&s->symtab, name, upb_value_ptr(def)); 3952 UPB_ASSERT_VAR(success, success == true); 3953 } 3954 free(add_defs); 3955 return true; 3956 3957 oom_err: 3958 upb_status_seterrmsg(status, "out of memory"); 3959 err: { 3960 /* For defs the user passed in, we need to donate the refs back. For defs 3961 * we dup'd, we need to just unref them. */ 3962 upb_strtable_begin(&iter, &addtab); 3963 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { 3964 upb_def *def = upb_value_getptr(upb_strtable_iter_value(&iter)); 3965 bool came_from_user = def->came_from_user; 3966 def->came_from_user = false; 3967 if (came_from_user) { 3968 upb_def_donateref(def, s, ref_donor); 3969 } else { 3970 upb_def_unref(def, s); 3971 } 3972 } 3973 } 3974 upb_strtable_uninit(&addtab); 3975 free(add_defs); 3976 assert(!upb_ok(status)); 3977 return false; 3978 } 3979 3980 /* Iteration. */ 3981 3982 static void advance_to_matching(upb_symtab_iter *iter) { 3983 if (iter->type == UPB_DEF_ANY) 3984 return; 3985 3986 while (!upb_strtable_done(&iter->iter) && 3987 iter->type != upb_symtab_iter_def(iter)->type) { 3988 upb_strtable_next(&iter->iter); 3989 } 3990 } 3991 3992 void upb_symtab_begin(upb_symtab_iter *iter, const upb_symtab *s, 3993 upb_deftype_t type) { 3994 upb_strtable_begin(&iter->iter, &s->symtab); 3995 iter->type = type; 3996 advance_to_matching(iter); 3997 } 3998 3999 void upb_symtab_next(upb_symtab_iter *iter) { 4000 upb_strtable_next(&iter->iter); 4001 advance_to_matching(iter); 4002 } 4003 4004 bool upb_symtab_done(const upb_symtab_iter *iter) { 4005 return upb_strtable_done(&iter->iter); 4006 } 4007 4008 const upb_def *upb_symtab_iter_def(const upb_symtab_iter *iter) { 4009 return upb_value_getptr(upb_strtable_iter_value(&iter->iter)); 4010 } 4011 /* 4012 ** upb_table Implementation 4013 ** 4014 ** Implementation is heavily inspired by Lua's ltable.c. 4015 */ 4016 4017 4018 #include <stdlib.h> 4019 #include <string.h> 4020 4021 #define UPB_MAXARRSIZE 16 /* 64k. */ 4022 4023 /* From Chromium. */ 4024 #define ARRAY_SIZE(x) \ 4025 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) 4026 4027 static const double MAX_LOAD = 0.85; 4028 4029 /* The minimum utilization of the array part of a mixed hash/array table. This 4030 * is a speed/memory-usage tradeoff (though it's not straightforward because of 4031 * cache effects). The lower this is, the more memory we'll use. */ 4032 static const double MIN_DENSITY = 0.1; 4033 4034 bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } 4035 4036 int log2ceil(uint64_t v) { 4037 int ret = 0; 4038 bool pow2 = is_pow2(v); 4039 while (v >>= 1) ret++; 4040 ret = pow2 ? ret : ret + 1; /* Ceiling. */ 4041 return UPB_MIN(UPB_MAXARRSIZE, ret); 4042 } 4043 4044 char *upb_strdup(const char *s) { 4045 return upb_strdup2(s, strlen(s)); 4046 } 4047 4048 char *upb_strdup2(const char *s, size_t len) { 4049 size_t n; 4050 char *p; 4051 4052 /* Prevent overflow errors. */ 4053 if (len == SIZE_MAX) return NULL; 4054 /* Always null-terminate, even if binary data; but don't rely on the input to 4055 * have a null-terminating byte since it may be a raw binary buffer. */ 4056 n = len + 1; 4057 p = malloc(n); 4058 if (p) { 4059 memcpy(p, s, len); 4060 p[len] = 0; 4061 } 4062 return p; 4063 } 4064 4065 /* A type to represent the lookup key of either a strtable or an inttable. */ 4066 typedef union { 4067 uintptr_t num; 4068 struct { 4069 const char *str; 4070 size_t len; 4071 } str; 4072 } lookupkey_t; 4073 4074 static lookupkey_t strkey2(const char *str, size_t len) { 4075 lookupkey_t k; 4076 k.str.str = str; 4077 k.str.len = len; 4078 return k; 4079 } 4080 4081 static lookupkey_t intkey(uintptr_t key) { 4082 lookupkey_t k; 4083 k.num = key; 4084 return k; 4085 } 4086 4087 typedef uint32_t hashfunc_t(upb_tabkey key); 4088 typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); 4089 4090 /* Base table (shared code) ***************************************************/ 4091 4092 /* For when we need to cast away const. */ 4093 static upb_tabent *mutable_entries(upb_table *t) { 4094 return (upb_tabent*)t->entries; 4095 } 4096 4097 static bool isfull(upb_table *t) { 4098 return (double)(t->count + 1) / upb_table_size(t) > MAX_LOAD; 4099 } 4100 4101 static bool init(upb_table *t, upb_ctype_t ctype, uint8_t size_lg2) { 4102 size_t bytes; 4103 4104 t->count = 0; 4105 t->ctype = ctype; 4106 t->size_lg2 = size_lg2; 4107 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; 4108 bytes = upb_table_size(t) * sizeof(upb_tabent); 4109 if (bytes > 0) { 4110 t->entries = malloc(bytes); 4111 if (!t->entries) return false; 4112 memset(mutable_entries(t), 0, bytes); 4113 } else { 4114 t->entries = NULL; 4115 } 4116 return true; 4117 } 4118 4119 static void uninit(upb_table *t) { free(mutable_entries(t)); } 4120 4121 static upb_tabent *emptyent(upb_table *t) { 4122 upb_tabent *e = mutable_entries(t) + upb_table_size(t); 4123 while (1) { if (upb_tabent_isempty(--e)) return e; assert(e > t->entries); } 4124 } 4125 4126 static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) { 4127 return (upb_tabent*)upb_getentry(t, hash); 4128 } 4129 4130 static const upb_tabent *findentry(const upb_table *t, lookupkey_t key, 4131 uint32_t hash, eqlfunc_t *eql) { 4132 const upb_tabent *e; 4133 4134 if (t->size_lg2 == 0) return NULL; 4135 e = upb_getentry(t, hash); 4136 if (upb_tabent_isempty(e)) return NULL; 4137 while (1) { 4138 if (eql(e->key, key)) return e; 4139 if ((e = e->next) == NULL) return NULL; 4140 } 4141 } 4142 4143 static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key, 4144 uint32_t hash, eqlfunc_t *eql) { 4145 return (upb_tabent*)findentry(t, key, hash, eql); 4146 } 4147 4148 static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v, 4149 uint32_t hash, eqlfunc_t *eql) { 4150 const upb_tabent *e = findentry(t, key, hash, eql); 4151 if (e) { 4152 if (v) { 4153 _upb_value_setval(v, e->val.val, t->ctype); 4154 } 4155 return true; 4156 } else { 4157 return false; 4158 } 4159 } 4160 4161 /* The given key must not already exist in the table. */ 4162 static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey, 4163 upb_value val, uint32_t hash, 4164 hashfunc_t *hashfunc, eqlfunc_t *eql) { 4165 upb_tabent *mainpos_e; 4166 upb_tabent *our_e; 4167 4168 UPB_UNUSED(eql); 4169 UPB_UNUSED(key); 4170 assert(findentry(t, key, hash, eql) == NULL); 4171 assert(val.ctype == t->ctype); 4172 4173 t->count++; 4174 mainpos_e = getentry_mutable(t, hash); 4175 our_e = mainpos_e; 4176 4177 if (upb_tabent_isempty(mainpos_e)) { 4178 /* Our main position is empty; use it. */ 4179 our_e->next = NULL; 4180 } else { 4181 /* Collision. */ 4182 upb_tabent *new_e = emptyent(t); 4183 /* Head of collider's chain. */ 4184 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key)); 4185 if (chain == mainpos_e) { 4186 /* Existing ent is in its main posisiton (it has the same hash as us, and 4187 * is the head of our chain). Insert to new ent and append to this chain. */ 4188 new_e->next = mainpos_e->next; 4189 mainpos_e->next = new_e; 4190 our_e = new_e; 4191 } else { 4192 /* Existing ent is not in its main position (it is a node in some other 4193 * chain). This implies that no existing ent in the table has our hash. 4194 * Evict it (updating its chain) and use its ent for head of our chain. */ 4195 *new_e = *mainpos_e; /* copies next. */ 4196 while (chain->next != mainpos_e) { 4197 chain = (upb_tabent*)chain->next; 4198 assert(chain); 4199 } 4200 chain->next = new_e; 4201 our_e = mainpos_e; 4202 our_e->next = NULL; 4203 } 4204 } 4205 our_e->key = tabkey; 4206 our_e->val.val = val.val; 4207 assert(findentry(t, key, hash, eql) == our_e); 4208 } 4209 4210 static bool rm(upb_table *t, lookupkey_t key, upb_value *val, 4211 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) { 4212 upb_tabent *chain = getentry_mutable(t, hash); 4213 if (upb_tabent_isempty(chain)) return false; 4214 if (eql(chain->key, key)) { 4215 /* Element to remove is at the head of its chain. */ 4216 t->count--; 4217 if (val) { 4218 _upb_value_setval(val, chain->val.val, t->ctype); 4219 } 4220 if (chain->next) { 4221 upb_tabent *move = (upb_tabent*)chain->next; 4222 *chain = *move; 4223 if (removed) *removed = move->key; 4224 move->key = 0; /* Make the slot empty. */ 4225 } else { 4226 if (removed) *removed = chain->key; 4227 chain->key = 0; /* Make the slot empty. */ 4228 } 4229 return true; 4230 } else { 4231 /* Element to remove is either in a non-head position or not in the 4232 * table. */ 4233 while (chain->next && !eql(chain->next->key, key)) 4234 chain = (upb_tabent*)chain->next; 4235 if (chain->next) { 4236 /* Found element to remove. */ 4237 upb_tabent *rm; 4238 4239 if (val) { 4240 _upb_value_setval(val, chain->next->val.val, t->ctype); 4241 } 4242 rm = (upb_tabent*)chain->next; 4243 if (removed) *removed = rm->key; 4244 rm->key = 0; 4245 chain->next = rm->next; 4246 t->count--; 4247 return true; 4248 } else { 4249 return false; 4250 } 4251 } 4252 } 4253 4254 static size_t next(const upb_table *t, size_t i) { 4255 do { 4256 if (++i >= upb_table_size(t)) 4257 return SIZE_MAX; 4258 } while(upb_tabent_isempty(&t->entries[i])); 4259 4260 return i; 4261 } 4262 4263 static size_t begin(const upb_table *t) { 4264 return next(t, -1); 4265 } 4266 4267 4268 /* upb_strtable ***************************************************************/ 4269 4270 /* A simple "subclass" of upb_table that only adds a hash function for strings. */ 4271 4272 static upb_tabkey strcopy(lookupkey_t k2) { 4273 char *str = malloc(k2.str.len + sizeof(uint32_t) + 1); 4274 if (str == NULL) return 0; 4275 memcpy(str, &k2.str.len, sizeof(uint32_t)); 4276 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len + 1); 4277 return (uintptr_t)str; 4278 } 4279 4280 static uint32_t strhash(upb_tabkey key) { 4281 uint32_t len; 4282 char *str = upb_tabstr(key, &len); 4283 return MurmurHash2(str, len, 0); 4284 } 4285 4286 static bool streql(upb_tabkey k1, lookupkey_t k2) { 4287 uint32_t len; 4288 char *str = upb_tabstr(k1, &len); 4289 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0; 4290 } 4291 4292 bool upb_strtable_init(upb_strtable *t, upb_ctype_t ctype) { 4293 return init(&t->t, ctype, 2); 4294 } 4295 4296 void upb_strtable_uninit(upb_strtable *t) { 4297 size_t i; 4298 for (i = 0; i < upb_table_size(&t->t); i++) 4299 free((void*)t->t.entries[i].key); 4300 uninit(&t->t); 4301 } 4302 4303 bool upb_strtable_resize(upb_strtable *t, size_t size_lg2) { 4304 upb_strtable new_table; 4305 upb_strtable_iter i; 4306 4307 if (!init(&new_table.t, t->t.ctype, size_lg2)) 4308 return false; 4309 upb_strtable_begin(&i, t); 4310 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { 4311 upb_strtable_insert2( 4312 &new_table, 4313 upb_strtable_iter_key(&i), 4314 upb_strtable_iter_keylength(&i), 4315 upb_strtable_iter_value(&i)); 4316 } 4317 upb_strtable_uninit(t); 4318 *t = new_table; 4319 return true; 4320 } 4321 4322 bool upb_strtable_insert2(upb_strtable *t, const char *k, size_t len, 4323 upb_value v) { 4324 lookupkey_t key; 4325 upb_tabkey tabkey; 4326 uint32_t hash; 4327 4328 if (isfull(&t->t)) { 4329 /* Need to resize. New table of double the size, add old elements to it. */ 4330 if (!upb_strtable_resize(t, t->t.size_lg2 + 1)) { 4331 return false; 4332 } 4333 } 4334 4335 key = strkey2(k, len); 4336 tabkey = strcopy(key); 4337 if (tabkey == 0) return false; 4338 4339 hash = MurmurHash2(key.str.str, key.str.len, 0); 4340 insert(&t->t, key, tabkey, v, hash, &strhash, &streql); 4341 return true; 4342 } 4343 4344 bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, 4345 upb_value *v) { 4346 uint32_t hash = MurmurHash2(key, len, 0); 4347 return lookup(&t->t, strkey2(key, len), v, hash, &streql); 4348 } 4349 4350 bool upb_strtable_remove2(upb_strtable *t, const char *key, size_t len, 4351 upb_value *val) { 4352 uint32_t hash = MurmurHash2(key, strlen(key), 0); 4353 upb_tabkey tabkey; 4354 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { 4355 free((void*)tabkey); 4356 return true; 4357 } else { 4358 return false; 4359 } 4360 } 4361 4362 /* Iteration */ 4363 4364 static const upb_tabent *str_tabent(const upb_strtable_iter *i) { 4365 return &i->t->t.entries[i->index]; 4366 } 4367 4368 void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { 4369 i->t = t; 4370 i->index = begin(&t->t); 4371 } 4372 4373 void upb_strtable_next(upb_strtable_iter *i) { 4374 i->index = next(&i->t->t, i->index); 4375 } 4376 4377 bool upb_strtable_done(const upb_strtable_iter *i) { 4378 return i->index >= upb_table_size(&i->t->t) || 4379 upb_tabent_isempty(str_tabent(i)); 4380 } 4381 4382 const char *upb_strtable_iter_key(upb_strtable_iter *i) { 4383 assert(!upb_strtable_done(i)); 4384 return upb_tabstr(str_tabent(i)->key, NULL); 4385 } 4386 4387 size_t upb_strtable_iter_keylength(upb_strtable_iter *i) { 4388 uint32_t len; 4389 assert(!upb_strtable_done(i)); 4390 upb_tabstr(str_tabent(i)->key, &len); 4391 return len; 4392 } 4393 4394 upb_value upb_strtable_iter_value(const upb_strtable_iter *i) { 4395 assert(!upb_strtable_done(i)); 4396 return _upb_value_val(str_tabent(i)->val.val, i->t->t.ctype); 4397 } 4398 4399 void upb_strtable_iter_setdone(upb_strtable_iter *i) { 4400 i->index = SIZE_MAX; 4401 } 4402 4403 bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, 4404 const upb_strtable_iter *i2) { 4405 if (upb_strtable_done(i1) && upb_strtable_done(i2)) 4406 return true; 4407 return i1->t == i2->t && i1->index == i2->index; 4408 } 4409 4410 4411 /* upb_inttable ***************************************************************/ 4412 4413 /* For inttables we use a hybrid structure where small keys are kept in an 4414 * array and large keys are put in the hash table. */ 4415 4416 static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); } 4417 4418 static bool inteql(upb_tabkey k1, lookupkey_t k2) { 4419 return k1 == k2.num; 4420 } 4421 4422 static upb_tabval *mutable_array(upb_inttable *t) { 4423 return (upb_tabval*)t->array; 4424 } 4425 4426 static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) { 4427 if (key < t->array_size) { 4428 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL; 4429 } else { 4430 upb_tabent *e = 4431 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql); 4432 return e ? &e->val : NULL; 4433 } 4434 } 4435 4436 static const upb_tabval *inttable_val_const(const upb_inttable *t, 4437 uintptr_t key) { 4438 return inttable_val((upb_inttable*)t, key); 4439 } 4440 4441 size_t upb_inttable_count(const upb_inttable *t) { 4442 return t->t.count + t->array_count; 4443 } 4444 4445 static void check(upb_inttable *t) { 4446 UPB_UNUSED(t); 4447 #if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) 4448 { 4449 /* This check is very expensive (makes inserts/deletes O(N)). */ 4450 size_t count = 0; 4451 upb_inttable_iter i; 4452 upb_inttable_begin(&i, t); 4453 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { 4454 assert(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL)); 4455 } 4456 assert(count == upb_inttable_count(t)); 4457 } 4458 #endif 4459 } 4460 4461 bool upb_inttable_sizedinit(upb_inttable *t, upb_ctype_t ctype, 4462 size_t asize, int hsize_lg2) { 4463 size_t array_bytes; 4464 4465 if (!init(&t->t, ctype, hsize_lg2)) return false; 4466 /* Always make the array part at least 1 long, so that we know key 0 4467 * won't be in the hash part, which simplifies things. */ 4468 t->array_size = UPB_MAX(1, asize); 4469 t->array_count = 0; 4470 array_bytes = t->array_size * sizeof(upb_value); 4471 t->array = malloc(array_bytes); 4472 if (!t->array) { 4473 uninit(&t->t); 4474 return false; 4475 } 4476 memset(mutable_array(t), 0xff, array_bytes); 4477 check(t); 4478 return true; 4479 } 4480 4481 bool upb_inttable_init(upb_inttable *t, upb_ctype_t ctype) { 4482 return upb_inttable_sizedinit(t, ctype, 0, 4); 4483 } 4484 4485 void upb_inttable_uninit(upb_inttable *t) { 4486 uninit(&t->t); 4487 free(mutable_array(t)); 4488 } 4489 4490 bool upb_inttable_insert(upb_inttable *t, uintptr_t key, upb_value val) { 4491 /* XXX: Table can't store value (uint64_t)-1. Need to somehow statically 4492 * guarantee that this is not necessary, or fix the limitation. */ 4493 upb_tabval tabval; 4494 tabval.val = val.val; 4495 UPB_UNUSED(tabval); 4496 assert(upb_arrhas(tabval)); 4497 4498 if (key < t->array_size) { 4499 assert(!upb_arrhas(t->array[key])); 4500 t->array_count++; 4501 mutable_array(t)[key].val = val.val; 4502 } else { 4503 if (isfull(&t->t)) { 4504 /* Need to resize the hash part, but we re-use the array part. */ 4505 size_t i; 4506 upb_table new_table; 4507 if (!init(&new_table, t->t.ctype, t->t.size_lg2 + 1)) 4508 return false; 4509 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) { 4510 const upb_tabent *e = &t->t.entries[i]; 4511 uint32_t hash; 4512 upb_value v; 4513 4514 _upb_value_setval(&v, e->val.val, t->t.ctype); 4515 hash = upb_inthash(e->key); 4516 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql); 4517 } 4518 4519 assert(t->t.count == new_table.count); 4520 4521 uninit(&t->t); 4522 t->t = new_table; 4523 } 4524 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); 4525 } 4526 check(t); 4527 return true; 4528 } 4529 4530 bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) { 4531 const upb_tabval *table_v = inttable_val_const(t, key); 4532 if (!table_v) return false; 4533 if (v) _upb_value_setval(v, table_v->val, t->t.ctype); 4534 return true; 4535 } 4536 4537 bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) { 4538 upb_tabval *table_v = inttable_val(t, key); 4539 if (!table_v) return false; 4540 table_v->val = val.val; 4541 return true; 4542 } 4543 4544 bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { 4545 bool success; 4546 if (key < t->array_size) { 4547 if (upb_arrhas(t->array[key])) { 4548 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT; 4549 t->array_count--; 4550 if (val) { 4551 _upb_value_setval(val, t->array[key].val, t->t.ctype); 4552 } 4553 mutable_array(t)[key] = empty; 4554 success = true; 4555 } else { 4556 success = false; 4557 } 4558 } else { 4559 upb_tabkey removed; 4560 uint32_t hash = upb_inthash(key); 4561 success = rm(&t->t, intkey(key), val, &removed, hash, &inteql); 4562 } 4563 check(t); 4564 return success; 4565 } 4566 4567 bool upb_inttable_push(upb_inttable *t, upb_value val) { 4568 return upb_inttable_insert(t, upb_inttable_count(t), val); 4569 } 4570 4571 upb_value upb_inttable_pop(upb_inttable *t) { 4572 upb_value val; 4573 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val); 4574 UPB_ASSERT_VAR(ok, ok); 4575 return val; 4576 } 4577 4578 bool upb_inttable_insertptr(upb_inttable *t, const void *key, upb_value val) { 4579 return upb_inttable_insert(t, (uintptr_t)key, val); 4580 } 4581 4582 bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, 4583 upb_value *v) { 4584 return upb_inttable_lookup(t, (uintptr_t)key, v); 4585 } 4586 4587 bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { 4588 return upb_inttable_remove(t, (uintptr_t)key, val); 4589 } 4590 4591 void upb_inttable_compact(upb_inttable *t) { 4592 /* Create a power-of-two histogram of the table keys. */ 4593 int counts[UPB_MAXARRSIZE + 1] = {0}; 4594 uintptr_t max_key = 0; 4595 upb_inttable_iter i; 4596 size_t arr_size; 4597 int arr_count; 4598 upb_inttable new_t; 4599 4600 upb_inttable_begin(&i, t); 4601 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { 4602 uintptr_t key = upb_inttable_iter_key(&i); 4603 if (key > max_key) { 4604 max_key = key; 4605 } 4606 counts[log2ceil(key)]++; 4607 } 4608 4609 arr_size = 1; 4610 arr_count = upb_inttable_count(t); 4611 4612 if (upb_inttable_count(t) >= max_key * MIN_DENSITY) { 4613 /* We can put 100% of the entries in the array part. */ 4614 arr_size = max_key + 1; 4615 } else { 4616 /* Find the largest power of two that satisfies the MIN_DENSITY 4617 * definition. */ 4618 int size_lg2; 4619 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 1; size_lg2--) { 4620 arr_size = 1 << size_lg2; 4621 arr_count -= counts[size_lg2]; 4622 if (arr_count >= arr_size * MIN_DENSITY) { 4623 break; 4624 } 4625 } 4626 } 4627 4628 /* Array part must always be at least 1 entry large to catch lookups of key 4629 * 0. Key 0 must always be in the array part because "0" in the hash part 4630 * denotes an empty entry. */ 4631 arr_size = UPB_MAX(arr_size, 1); 4632 4633 { 4634 /* Insert all elements into new, perfectly-sized table. */ 4635 int hash_count = upb_inttable_count(t) - arr_count; 4636 int hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0; 4637 int hashsize_lg2 = log2ceil(hash_size); 4638 4639 assert(hash_count >= 0); 4640 upb_inttable_sizedinit(&new_t, t->t.ctype, arr_size, hashsize_lg2); 4641 upb_inttable_begin(&i, t); 4642 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { 4643 uintptr_t k = upb_inttable_iter_key(&i); 4644 upb_inttable_insert(&new_t, k, upb_inttable_iter_value(&i)); 4645 } 4646 assert(new_t.array_size == arr_size); 4647 assert(new_t.t.size_lg2 == hashsize_lg2); 4648 } 4649 upb_inttable_uninit(t); 4650 *t = new_t; 4651 } 4652 4653 /* Iteration. */ 4654 4655 static const upb_tabent *int_tabent(const upb_inttable_iter *i) { 4656 assert(!i->array_part); 4657 return &i->t->t.entries[i->index]; 4658 } 4659 4660 static upb_tabval int_arrent(const upb_inttable_iter *i) { 4661 assert(i->array_part); 4662 return i->t->array[i->index]; 4663 } 4664 4665 void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { 4666 i->t = t; 4667 i->index = -1; 4668 i->array_part = true; 4669 upb_inttable_next(i); 4670 } 4671 4672 void upb_inttable_next(upb_inttable_iter *iter) { 4673 const upb_inttable *t = iter->t; 4674 if (iter->array_part) { 4675 while (++iter->index < t->array_size) { 4676 if (upb_arrhas(int_arrent(iter))) { 4677 return; 4678 } 4679 } 4680 iter->array_part = false; 4681 iter->index = begin(&t->t); 4682 } else { 4683 iter->index = next(&t->t, iter->index); 4684 } 4685 } 4686 4687 bool upb_inttable_done(const upb_inttable_iter *i) { 4688 if (i->array_part) { 4689 return i->index >= i->t->array_size || 4690 !upb_arrhas(int_arrent(i)); 4691 } else { 4692 return i->index >= upb_table_size(&i->t->t) || 4693 upb_tabent_isempty(int_tabent(i)); 4694 } 4695 } 4696 4697 uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) { 4698 assert(!upb_inttable_done(i)); 4699 return i->array_part ? i->index : int_tabent(i)->key; 4700 } 4701 4702 upb_value upb_inttable_iter_value(const upb_inttable_iter *i) { 4703 assert(!upb_inttable_done(i)); 4704 return _upb_value_val( 4705 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val, 4706 i->t->t.ctype); 4707 } 4708 4709 void upb_inttable_iter_setdone(upb_inttable_iter *i) { 4710 i->index = SIZE_MAX; 4711 i->array_part = false; 4712 } 4713 4714 bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, 4715 const upb_inttable_iter *i2) { 4716 if (upb_inttable_done(i1) && upb_inttable_done(i2)) 4717 return true; 4718 return i1->t == i2->t && i1->index == i2->index && 4719 i1->array_part == i2->array_part; 4720 } 4721 4722 #ifdef UPB_UNALIGNED_READS_OK 4723 /* ----------------------------------------------------------------------------- 4724 * MurmurHash2, by Austin Appleby (released as public domain). 4725 * Reformatted and C99-ified by Joshua Haberman. 4726 * Note - This code makes a few assumptions about how your machine behaves - 4727 * 1. We can read a 4-byte value from any address without crashing 4728 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t 4729 * And it has a few limitations - 4730 * 1. It will not work incrementally. 4731 * 2. It will not produce the same results on little-endian and big-endian 4732 * machines. */ 4733 uint32_t MurmurHash2(const void *key, size_t len, uint32_t seed) { 4734 /* 'm' and 'r' are mixing constants generated offline. 4735 * They're not really 'magic', they just happen to work well. */ 4736 const uint32_t m = 0x5bd1e995; 4737 const int32_t r = 24; 4738 4739 /* Initialize the hash to a 'random' value */ 4740 uint32_t h = seed ^ len; 4741 4742 /* Mix 4 bytes at a time into the hash */ 4743 const uint8_t * data = (const uint8_t *)key; 4744 while(len >= 4) { 4745 uint32_t k = *(uint32_t *)data; 4746 4747 k *= m; 4748 k ^= k >> r; 4749 k *= m; 4750 4751 h *= m; 4752 h ^= k; 4753 4754 data += 4; 4755 len -= 4; 4756 } 4757 4758 /* Handle the last few bytes of the input array */ 4759 switch(len) { 4760 case 3: h ^= data[2] << 16; 4761 case 2: h ^= data[1] << 8; 4762 case 1: h ^= data[0]; h *= m; 4763 }; 4764 4765 /* Do a few final mixes of the hash to ensure the last few 4766 * bytes are well-incorporated. */ 4767 h ^= h >> 13; 4768 h *= m; 4769 h ^= h >> 15; 4770 4771 return h; 4772 } 4773 4774 #else /* !UPB_UNALIGNED_READS_OK */ 4775 4776 /* ----------------------------------------------------------------------------- 4777 * MurmurHashAligned2, by Austin Appleby 4778 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer 4779 * on certain platforms. 4780 * Performance will be lower than MurmurHash2 */ 4781 4782 #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 4783 4784 uint32_t MurmurHash2(const void * key, size_t len, uint32_t seed) { 4785 const uint32_t m = 0x5bd1e995; 4786 const int32_t r = 24; 4787 const uint8_t * data = (const uint8_t *)key; 4788 uint32_t h = seed ^ len; 4789 uint8_t align = (uintptr_t)data & 3; 4790 4791 if(align && (len >= 4)) { 4792 /* Pre-load the temp registers */ 4793 uint32_t t = 0, d = 0; 4794 int32_t sl; 4795 int32_t sr; 4796 4797 switch(align) { 4798 case 1: t |= data[2] << 16; 4799 case 2: t |= data[1] << 8; 4800 case 3: t |= data[0]; 4801 } 4802 4803 t <<= (8 * align); 4804 4805 data += 4-align; 4806 len -= 4-align; 4807 4808 sl = 8 * (4-align); 4809 sr = 8 * align; 4810 4811 /* Mix */ 4812 4813 while(len >= 4) { 4814 uint32_t k; 4815 4816 d = *(uint32_t *)data; 4817 t = (t >> sr) | (d << sl); 4818 4819 k = t; 4820 4821 MIX(h,k,m); 4822 4823 t = d; 4824 4825 data += 4; 4826 len -= 4; 4827 } 4828 4829 /* Handle leftover data in temp registers */ 4830 4831 d = 0; 4832 4833 if(len >= align) { 4834 uint32_t k; 4835 4836 switch(align) { 4837 case 3: d |= data[2] << 16; 4838 case 2: d |= data[1] << 8; 4839 case 1: d |= data[0]; 4840 } 4841 4842 k = (t >> sr) | (d << sl); 4843 MIX(h,k,m); 4844 4845 data += align; 4846 len -= align; 4847 4848 /* ---------- 4849 * Handle tail bytes */ 4850 4851 switch(len) { 4852 case 3: h ^= data[2] << 16; 4853 case 2: h ^= data[1] << 8; 4854 case 1: h ^= data[0]; h *= m; 4855 }; 4856 } else { 4857 switch(len) { 4858 case 3: d |= data[2] << 16; 4859 case 2: d |= data[1] << 8; 4860 case 1: d |= data[0]; 4861 case 0: h ^= (t >> sr) | (d << sl); h *= m; 4862 } 4863 } 4864 4865 h ^= h >> 13; 4866 h *= m; 4867 h ^= h >> 15; 4868 4869 return h; 4870 } else { 4871 while(len >= 4) { 4872 uint32_t k = *(uint32_t *)data; 4873 4874 MIX(h,k,m); 4875 4876 data += 4; 4877 len -= 4; 4878 } 4879 4880 /* ---------- 4881 * Handle tail bytes */ 4882 4883 switch(len) { 4884 case 3: h ^= data[2] << 16; 4885 case 2: h ^= data[1] << 8; 4886 case 1: h ^= data[0]; h *= m; 4887 }; 4888 4889 h ^= h >> 13; 4890 h *= m; 4891 h ^= h >> 15; 4892 4893 return h; 4894 } 4895 } 4896 #undef MIX 4897 4898 #endif /* UPB_UNALIGNED_READS_OK */ 4899 4900 #include <errno.h> 4901 #include <stdarg.h> 4902 #include <stddef.h> 4903 #include <stdint.h> 4904 #include <stdio.h> 4905 #include <stdlib.h> 4906 #include <string.h> 4907 4908 bool upb_dumptostderr(void *closure, const upb_status* status) { 4909 UPB_UNUSED(closure); 4910 fprintf(stderr, "%s\n", upb_status_errmsg(status)); 4911 return false; 4912 } 4913 4914 /* Guarantee null-termination and provide ellipsis truncation. 4915 * It may be tempting to "optimize" this by initializing these final 4916 * four bytes up-front and then being careful never to overwrite them, 4917 * this is safer and simpler. */ 4918 static void nullz(upb_status *status) { 4919 const char *ellipsis = "..."; 4920 size_t len = strlen(ellipsis); 4921 assert(sizeof(status->msg) > len); 4922 memcpy(status->msg + sizeof(status->msg) - len, ellipsis, len); 4923 } 4924 4925 void upb_status_clear(upb_status *status) { 4926 if (!status) return; 4927 status->ok_ = true; 4928 status->code_ = 0; 4929 status->msg[0] = '\0'; 4930 } 4931 4932 bool upb_ok(const upb_status *status) { return status->ok_; } 4933 4934 upb_errorspace *upb_status_errspace(const upb_status *status) { 4935 return status->error_space_; 4936 } 4937 4938 int upb_status_errcode(const upb_status *status) { return status->code_; } 4939 4940 const char *upb_status_errmsg(const upb_status *status) { return status->msg; } 4941 4942 void upb_status_seterrmsg(upb_status *status, const char *msg) { 4943 if (!status) return; 4944 status->ok_ = false; 4945 strncpy(status->msg, msg, sizeof(status->msg)); 4946 nullz(status); 4947 } 4948 4949 void upb_status_seterrf(upb_status *status, const char *fmt, ...) { 4950 va_list args; 4951 va_start(args, fmt); 4952 upb_status_vseterrf(status, fmt, args); 4953 va_end(args); 4954 } 4955 4956 void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) { 4957 if (!status) return; 4958 status->ok_ = false; 4959 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args); 4960 nullz(status); 4961 } 4962 4963 void upb_status_seterrcode(upb_status *status, upb_errorspace *space, 4964 int code) { 4965 if (!status) return; 4966 status->ok_ = false; 4967 status->error_space_ = space; 4968 status->code_ = code; 4969 space->set_message(status, code); 4970 } 4971 4972 void upb_status_copy(upb_status *to, const upb_status *from) { 4973 if (!to) return; 4974 *to = *from; 4975 } 4976 /* This file was generated by upbc (the upb compiler). 4977 * Do not edit -- your changes will be discarded when the file is 4978 * regenerated. */ 4979 4980 4981 static const upb_msgdef msgs[20]; 4982 static const upb_fielddef fields[81]; 4983 static const upb_enumdef enums[4]; 4984 static const upb_tabent strentries[236]; 4985 static const upb_tabent intentries[14]; 4986 static const upb_tabval arrays[232]; 4987 4988 #ifdef UPB_DEBUG_REFS 4989 static upb_inttable reftables[212]; 4990 #endif 4991 4992 static const upb_msgdef msgs[20] = { 4993 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto", 27, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[0], 8, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[0]),&reftables[0], &reftables[1]), 4994 UPB_MSGDEF_INIT("google.protobuf.DescriptorProto.ExtensionRange", 4, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[8], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[16]),&reftables[2], &reftables[3]), 4995 UPB_MSGDEF_INIT("google.protobuf.EnumDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[11], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[20]),&reftables[4], &reftables[5]), 4996 UPB_MSGDEF_INIT("google.protobuf.EnumOptions", 7, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[0], &arrays[15], 8, 1), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[24]),&reftables[6], &reftables[7]), 4997 UPB_MSGDEF_INIT("google.protobuf.EnumValueDescriptorProto", 8, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[23], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[28]),&reftables[8], &reftables[9]), 4998 UPB_MSGDEF_INIT("google.protobuf.EnumValueOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[2], &arrays[27], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[32]),&reftables[10], &reftables[11]), 4999 UPB_MSGDEF_INIT("google.protobuf.FieldDescriptorProto", 19, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[31], 9, 8), UPB_STRTABLE_INIT(8, 15, UPB_CTYPE_PTR, 4, &strentries[36]),&reftables[12], &reftables[13]), 5000 UPB_MSGDEF_INIT("google.protobuf.FieldOptions", 14, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[4], &arrays[40], 32, 6), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[52]),&reftables[14], &reftables[15]), 5001 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorProto", 39, 6, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[72], 12, 11), UPB_STRTABLE_INIT(11, 15, UPB_CTYPE_PTR, 4, &strentries[68]),&reftables[16], &reftables[17]), 5002 UPB_MSGDEF_INIT("google.protobuf.FileDescriptorSet", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[84], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[84]),&reftables[18], &reftables[19]), 5003 UPB_MSGDEF_INIT("google.protobuf.FileOptions", 21, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[6], &arrays[86], 64, 9), UPB_STRTABLE_INIT(10, 15, UPB_CTYPE_PTR, 4, &strentries[88]),&reftables[20], &reftables[21]), 5004 UPB_MSGDEF_INIT("google.protobuf.MessageOptions", 8, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[8], &arrays[150], 16, 2), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[104]),&reftables[22], &reftables[23]), 5005 UPB_MSGDEF_INIT("google.protobuf.MethodDescriptorProto", 13, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[166], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[108]),&reftables[24], &reftables[25]), 5006 UPB_MSGDEF_INIT("google.protobuf.MethodOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[10], &arrays[171], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[116]),&reftables[26], &reftables[27]), 5007 UPB_MSGDEF_INIT("google.protobuf.ServiceDescriptorProto", 11, 2, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[175], 4, 3), UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_PTR, 2, &strentries[120]),&reftables[28], &reftables[29]), 5008 UPB_MSGDEF_INIT("google.protobuf.ServiceOptions", 6, 1, UPB_INTTABLE_INIT(1, 1, UPB_CTYPE_PTR, 1, &intentries[12], &arrays[179], 4, 0), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[124]),&reftables[30], &reftables[31]), 5009 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo", 6, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[183], 2, 1), UPB_STRTABLE_INIT(1, 3, UPB_CTYPE_PTR, 2, &strentries[128]),&reftables[32], &reftables[33]), 5010 UPB_MSGDEF_INIT("google.protobuf.SourceCodeInfo.Location", 14, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[185], 5, 4), UPB_STRTABLE_INIT(4, 7, UPB_CTYPE_PTR, 3, &strentries[132]),&reftables[34], &reftables[35]), 5011 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption", 18, 1, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[190], 9, 7), UPB_STRTABLE_INIT(7, 15, UPB_CTYPE_PTR, 4, &strentries[140]),&reftables[36], &reftables[37]), 5012 UPB_MSGDEF_INIT("google.protobuf.UninterpretedOption.NamePart", 6, 0, UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_PTR, 0, NULL, &arrays[199], 3, 2), UPB_STRTABLE_INIT(2, 3, UPB_CTYPE_PTR, 2, &strentries[156]),&reftables[38], &reftables[39]), 5013 }; 5014 5015 static const upb_fielddef fields[81] = { 5016 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "aggregate_value", 8, &msgs[18], NULL, 15, 6, {0},&reftables[40], &reftables[41]), 5017 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "allow_alias", 2, &msgs[3], NULL, 6, 1, {0},&reftables[42], &reftables[43]), 5018 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "cc_generic_services", 16, &msgs[10], NULL, 17, 6, {0},&reftables[44], &reftables[45]), 5019 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "ctype", 1, &msgs[7], (const upb_def*)(&enums[2]), 6, 1, {0},&reftables[46], &reftables[47]), 5020 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "default_value", 7, &msgs[6], NULL, 16, 7, {0},&reftables[48], &reftables[49]), 5021 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_STRING, 0, false, false, false, false, "dependency", 3, &msgs[8], NULL, 30, 8, {0},&reftables[50], &reftables[51]), 5022 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "deprecated", 3, &msgs[7], NULL, 8, 3, {0},&reftables[52], &reftables[53]), 5023 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_DOUBLE, 0, false, false, false, false, "double_value", 6, &msgs[18], NULL, 11, 4, {0},&reftables[54], &reftables[55]), 5024 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "end", 2, &msgs[1], NULL, 3, 1, {0},&reftables[56], &reftables[57]), 5025 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 4, &msgs[0], (const upb_def*)(&msgs[2]), 16, 2, {0},&reftables[58], &reftables[59]), 5026 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "enum_type", 5, &msgs[8], (const upb_def*)(&msgs[2]), 13, 1, {0},&reftables[60], &reftables[61]), 5027 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "experimental_map_key", 9, &msgs[7], NULL, 10, 5, {0},&reftables[62], &reftables[63]), 5028 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "extendee", 2, &msgs[6], NULL, 7, 2, {0},&reftables[64], &reftables[65]), 5029 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 7, &msgs[8], (const upb_def*)(&msgs[6]), 19, 3, {0},&reftables[66], &reftables[67]), 5030 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension", 6, &msgs[0], (const upb_def*)(&msgs[6]), 22, 4, {0},&reftables[68], &reftables[69]), 5031 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "extension_range", 5, &msgs[0], (const upb_def*)(&msgs[1]), 19, 3, {0},&reftables[70], &reftables[71]), 5032 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "field", 2, &msgs[0], (const upb_def*)(&msgs[6]), 10, 0, {0},&reftables[72], &reftables[73]), 5033 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "file", 1, &msgs[9], (const upb_def*)(&msgs[8]), 5, 0, {0},&reftables[74], &reftables[75]), 5034 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "go_package", 11, &msgs[10], NULL, 14, 5, {0},&reftables[76], &reftables[77]), 5035 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "identifier_value", 3, &msgs[18], NULL, 6, 1, {0},&reftables[78], &reftables[79]), 5036 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "input_type", 2, &msgs[12], NULL, 7, 2, {0},&reftables[80], &reftables[81]), 5037 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_BOOL, 0, false, false, false, false, "is_extension", 2, &msgs[19], NULL, 5, 1, {0},&reftables[82], &reftables[83]), 5038 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generate_equals_and_hash", 20, &msgs[10], NULL, 20, 9, {0},&reftables[84], &reftables[85]), 5039 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_generic_services", 17, &msgs[10], NULL, 18, 7, {0},&reftables[86], &reftables[87]), 5040 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "java_multiple_files", 10, &msgs[10], NULL, 13, 4, {0},&reftables[88], &reftables[89]), 5041 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_outer_classname", 8, &msgs[10], NULL, 9, 2, {0},&reftables[90], &reftables[91]), 5042 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "java_package", 1, &msgs[10], NULL, 6, 1, {0},&reftables[92], &reftables[93]), 5043 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "label", 4, &msgs[6], (const upb_def*)(&enums[0]), 11, 4, {0},&reftables[94], &reftables[95]), 5044 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "lazy", 5, &msgs[7], NULL, 9, 4, {0},&reftables[96], &reftables[97]), 5045 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "leading_comments", 3, &msgs[17], NULL, 8, 2, {0},&reftables[98], &reftables[99]), 5046 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "location", 1, &msgs[16], (const upb_def*)(&msgs[17]), 5, 0, {0},&reftables[100], &reftables[101]), 5047 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "message_set_wire_format", 1, &msgs[11], NULL, 6, 1, {0},&reftables[102], &reftables[103]), 5048 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "message_type", 4, &msgs[8], (const upb_def*)(&msgs[0]), 10, 0, {0},&reftables[104], &reftables[105]), 5049 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "method", 2, &msgs[14], (const upb_def*)(&msgs[12]), 6, 0, {0},&reftables[106], &reftables[107]), 5050 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[8], NULL, 22, 6, {0},&reftables[108], &reftables[109]), 5051 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[14], NULL, 8, 2, {0},&reftables[110], &reftables[111]), 5052 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "name", 2, &msgs[18], (const upb_def*)(&msgs[19]), 5, 0, {0},&reftables[112], &reftables[113]), 5053 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[4], NULL, 4, 1, {0},&reftables[114], &reftables[115]), 5054 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[0], NULL, 24, 6, {0},&reftables[116], &reftables[117]), 5055 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[12], NULL, 4, 1, {0},&reftables[118], &reftables[119]), 5056 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[2], NULL, 8, 2, {0},&reftables[120], &reftables[121]), 5057 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "name", 1, &msgs[6], NULL, 4, 1, {0},&reftables[122], &reftables[123]), 5058 UPB_FIELDDEF_INIT(UPB_LABEL_REQUIRED, UPB_TYPE_STRING, 0, false, false, false, false, "name_part", 1, &msgs[19], NULL, 2, 0, {0},&reftables[124], &reftables[125]), 5059 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT64, UPB_INTFMT_VARIABLE, false, false, false, false, "negative_int_value", 5, &msgs[18], NULL, 10, 3, {0},&reftables[126], &reftables[127]), 5060 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "nested_type", 3, &msgs[0], (const upb_def*)(&msgs[0]), 13, 1, {0},&reftables[128], &reftables[129]), 5061 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "no_standard_descriptor_accessor", 2, &msgs[11], NULL, 7, 2, {0},&reftables[130], &reftables[131]), 5062 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 3, &msgs[6], NULL, 10, 3, {0},&reftables[132], &reftables[133]), 5063 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "number", 2, &msgs[4], NULL, 7, 2, {0},&reftables[134], &reftables[135]), 5064 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "optimize_for", 9, &msgs[10], (const upb_def*)(&enums[3]), 12, 3, {0},&reftables[136], &reftables[137]), 5065 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 7, &msgs[0], (const upb_def*)(&msgs[11]), 23, 5, {0},&reftables[138], &reftables[139]), 5066 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[2], (const upb_def*)(&msgs[3]), 7, 1, {0},&reftables[140], &reftables[141]), 5067 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[6], (const upb_def*)(&msgs[7]), 3, 0, {0},&reftables[142], &reftables[143]), 5068 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[4], (const upb_def*)(&msgs[5]), 3, 0, {0},&reftables[144], &reftables[145]), 5069 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 8, &msgs[8], (const upb_def*)(&msgs[10]), 20, 4, {0},&reftables[146], &reftables[147]), 5070 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 3, &msgs[14], (const upb_def*)(&msgs[15]), 7, 1, {0},&reftables[148], &reftables[149]), 5071 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "options", 4, &msgs[12], (const upb_def*)(&msgs[13]), 3, 0, {0},&reftables[150], &reftables[151]), 5072 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "output_type", 3, &msgs[12], NULL, 10, 3, {0},&reftables[152], &reftables[153]), 5073 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "package", 2, &msgs[8], NULL, 25, 7, {0},&reftables[154], &reftables[155]), 5074 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "packed", 2, &msgs[7], NULL, 7, 2, {0},&reftables[156], &reftables[157]), 5075 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "path", 1, &msgs[17], NULL, 4, 0, {0},&reftables[158], &reftables[159]), 5076 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_UINT64, UPB_INTFMT_VARIABLE, false, false, false, false, "positive_int_value", 4, &msgs[18], NULL, 9, 2, {0},&reftables[160], &reftables[161]), 5077 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "public_dependency", 10, &msgs[8], NULL, 35, 9, {0},&reftables[162], &reftables[163]), 5078 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "py_generic_services", 18, &msgs[10], NULL, 19, 8, {0},&reftables[164], &reftables[165]), 5079 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "service", 6, &msgs[8], (const upb_def*)(&msgs[14]), 16, 2, {0},&reftables[166], &reftables[167]), 5080 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_MESSAGE, 0, false, false, false, false, "source_code_info", 9, &msgs[8], (const upb_def*)(&msgs[16]), 21, 5, {0},&reftables[168], &reftables[169]), 5081 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, true, "span", 2, &msgs[17], NULL, 7, 1, {0},&reftables[170], &reftables[171]), 5082 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "start", 1, &msgs[1], NULL, 2, 0, {0},&reftables[172], &reftables[173]), 5083 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BYTES, 0, false, false, false, false, "string_value", 7, &msgs[18], NULL, 12, 5, {0},&reftables[174], &reftables[175]), 5084 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "trailing_comments", 4, &msgs[17], NULL, 11, 3, {0},&reftables[176], &reftables[177]), 5085 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_ENUM, 0, false, false, false, false, "type", 5, &msgs[6], (const upb_def*)(&enums[1]), 12, 5, {0},&reftables[178], &reftables[179]), 5086 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_STRING, 0, false, false, false, false, "type_name", 6, &msgs[6], NULL, 13, 6, {0},&reftables[180], &reftables[181]), 5087 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[5], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[182], &reftables[183]), 5088 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[15], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[184], &reftables[185]), 5089 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[3], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[186], &reftables[187]), 5090 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[13], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[188], &reftables[189]), 5091 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[10], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[190], &reftables[191]), 5092 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[11], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[192], &reftables[193]), 5093 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "uninterpreted_option", 999, &msgs[7], (const upb_def*)(&msgs[18]), 5, 0, {0},&reftables[194], &reftables[195]), 5094 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_MESSAGE, 0, false, false, false, false, "value", 2, &msgs[2], (const upb_def*)(&msgs[4]), 6, 0, {0},&reftables[196], &reftables[197]), 5095 UPB_FIELDDEF_INIT(UPB_LABEL_OPTIONAL, UPB_TYPE_BOOL, 0, false, false, false, false, "weak", 10, &msgs[7], NULL, 13, 6, {0},&reftables[198], &reftables[199]), 5096 UPB_FIELDDEF_INIT(UPB_LABEL_REPEATED, UPB_TYPE_INT32, UPB_INTFMT_VARIABLE, false, false, false, false, "weak_dependency", 11, &msgs[8], NULL, 38, 10, {0},&reftables[200], &reftables[201]), 5097 }; 5098 5099 static const upb_enumdef enums[4] = { 5100 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Label", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[160]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[202], 4, 3), 0, &reftables[202], &reftables[203]), 5101 UPB_ENUMDEF_INIT("google.protobuf.FieldDescriptorProto.Type", UPB_STRTABLE_INIT(18, 31, UPB_CTYPE_INT32, 5, &strentries[164]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[206], 19, 18), 0, &reftables[204], &reftables[205]), 5102 UPB_ENUMDEF_INIT("google.protobuf.FieldOptions.CType", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[196]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[225], 3, 3), 0, &reftables[206], &reftables[207]), 5103 UPB_ENUMDEF_INIT("google.protobuf.FileOptions.OptimizeMode", UPB_STRTABLE_INIT(3, 3, UPB_CTYPE_INT32, 2, &strentries[200]), UPB_INTTABLE_INIT(0, 0, UPB_CTYPE_CSTR, 0, NULL, &arrays[228], 4, 3), 0, &reftables[208], &reftables[209]), 5104 }; 5105 5106 static const upb_tabent strentries[236] = { 5107 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[14]), NULL}, 5108 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5109 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5110 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[38]), NULL}, 5111 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5112 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5113 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5114 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "field"), UPB_TABVALUE_PTR_INIT(&fields[16]), NULL}, 5115 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "extension_range"), UPB_TABVALUE_PTR_INIT(&fields[15]), NULL}, 5116 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5117 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "nested_type"), UPB_TABVALUE_PTR_INIT(&fields[44]), NULL}, 5118 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5119 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5120 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5121 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[49]), NULL}, 5122 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[9]), &strentries[14]}, 5123 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "start"), UPB_TABVALUE_PTR_INIT(&fields[66]), NULL}, 5124 {UPB_TABKEY_STR("\003", "\000", "\000", "\000", "end"), UPB_TABVALUE_PTR_INIT(&fields[8]), NULL}, 5125 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5126 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5127 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5128 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "value"), UPB_TABVALUE_PTR_INIT(&fields[78]), NULL}, 5129 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[50]), NULL}, 5130 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[40]), &strentries[22]}, 5131 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL}, 5132 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5133 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "allow_alias"), UPB_TABVALUE_PTR_INIT(&fields[1]), NULL}, 5134 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5135 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[47]), NULL}, 5136 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5137 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[52]), NULL}, 5138 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[37]), &strentries[30]}, 5139 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL}, 5140 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5141 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5142 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5143 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5144 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "label"), UPB_TABVALUE_PTR_INIT(&fields[27]), NULL}, 5145 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5146 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[41]), NULL}, 5147 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5148 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5149 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5150 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5151 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "number"), UPB_TABVALUE_PTR_INIT(&fields[46]), &strentries[49]}, 5152 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5153 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5154 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "type_name"), UPB_TABVALUE_PTR_INIT(&fields[70]), NULL}, 5155 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "extendee"), UPB_TABVALUE_PTR_INIT(&fields[12]), NULL}, 5156 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "type"), UPB_TABVALUE_PTR_INIT(&fields[69]), &strentries[48]}, 5157 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "default_value"), UPB_TABVALUE_PTR_INIT(&fields[4]), NULL}, 5158 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[51]), NULL}, 5159 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "experimental_map_key"), UPB_TABVALUE_PTR_INIT(&fields[11]), &strentries[67]}, 5160 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5161 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "weak"), UPB_TABVALUE_PTR_INIT(&fields[79]), NULL}, 5162 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5163 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5164 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5165 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5166 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "packed"), UPB_TABVALUE_PTR_INIT(&fields[58]), NULL}, 5167 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "lazy"), UPB_TABVALUE_PTR_INIT(&fields[28]), NULL}, 5168 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5169 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "ctype"), UPB_TABVALUE_PTR_INIT(&fields[3]), NULL}, 5170 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5171 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5172 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "deprecated"), UPB_TABVALUE_PTR_INIT(&fields[6]), NULL}, 5173 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5174 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL}, 5175 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "extension"), UPB_TABVALUE_PTR_INIT(&fields[13]), NULL}, 5176 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "weak_dependency"), UPB_TABVALUE_PTR_INIT(&fields[80]), NULL}, 5177 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5178 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[34]), NULL}, 5179 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "service"), UPB_TABVALUE_PTR_INIT(&fields[63]), NULL}, 5180 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5181 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "source_code_info"), UPB_TABVALUE_PTR_INIT(&fields[64]), NULL}, 5182 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5183 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5184 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5185 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "dependency"), UPB_TABVALUE_PTR_INIT(&fields[5]), NULL}, 5186 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "message_type"), UPB_TABVALUE_PTR_INIT(&fields[32]), NULL}, 5187 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "package"), UPB_TABVALUE_PTR_INIT(&fields[57]), NULL}, 5188 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[53]), &strentries[82]}, 5189 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "enum_type"), UPB_TABVALUE_PTR_INIT(&fields[10]), NULL}, 5190 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "public_dependency"), UPB_TABVALUE_PTR_INIT(&fields[61]), &strentries[81]}, 5191 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5192 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "file"), UPB_TABVALUE_PTR_INIT(&fields[17]), NULL}, 5193 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5194 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5195 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL}, 5196 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5197 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "cc_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[2]), NULL}, 5198 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5199 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "java_multiple_files"), UPB_TABVALUE_PTR_INIT(&fields[24]), NULL}, 5200 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5201 {UPB_TABKEY_STR("\025", "\000", "\000", "\000", "java_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[23]), &strentries[102]}, 5202 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "java_generate_equals_and_hash"), UPB_TABVALUE_PTR_INIT(&fields[22]), NULL}, 5203 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5204 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5205 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5206 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "go_package"), UPB_TABVALUE_PTR_INIT(&fields[18]), NULL}, 5207 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "java_package"), UPB_TABVALUE_PTR_INIT(&fields[26]), NULL}, 5208 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "optimize_for"), UPB_TABVALUE_PTR_INIT(&fields[48]), NULL}, 5209 {UPB_TABKEY_STR("\023", "\000", "\000", "\000", "py_generic_services"), UPB_TABVALUE_PTR_INIT(&fields[62]), NULL}, 5210 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "java_outer_classname"), UPB_TABVALUE_PTR_INIT(&fields[25]), NULL}, 5211 {UPB_TABKEY_STR("\027", "\000", "\000", "\000", "message_set_wire_format"), UPB_TABVALUE_PTR_INIT(&fields[31]), &strentries[106]}, 5212 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5213 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL}, 5214 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "no_standard_descriptor_accessor"), UPB_TABVALUE_PTR_INIT(&fields[45]), NULL}, 5215 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5216 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5217 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5218 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[39]), NULL}, 5219 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "input_type"), UPB_TABVALUE_PTR_INIT(&fields[20]), NULL}, 5220 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5221 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "output_type"), UPB_TABVALUE_PTR_INIT(&fields[56]), NULL}, 5222 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[55]), NULL}, 5223 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL}, 5224 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5225 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5226 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5227 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5228 {UPB_TABKEY_STR("\007", "\000", "\000", "\000", "options"), UPB_TABVALUE_PTR_INIT(&fields[54]), &strentries[122]}, 5229 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "method"), UPB_TABVALUE_PTR_INIT(&fields[33]), NULL}, 5230 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[35]), &strentries[121]}, 5231 {UPB_TABKEY_STR("\024", "\000", "\000", "\000", "uninterpreted_option"), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL}, 5232 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5233 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5234 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5235 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5236 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5237 {UPB_TABKEY_STR("\010", "\000", "\000", "\000", "location"), UPB_TABVALUE_PTR_INIT(&fields[30]), NULL}, 5238 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5239 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5240 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5241 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5242 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "span"), UPB_TABVALUE_PTR_INIT(&fields[65]), &strentries[139]}, 5243 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5244 {UPB_TABKEY_STR("\021", "\000", "\000", "\000", "trailing_comments"), UPB_TABVALUE_PTR_INIT(&fields[68]), NULL}, 5245 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "leading_comments"), UPB_TABVALUE_PTR_INIT(&fields[29]), &strentries[137]}, 5246 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "path"), UPB_TABVALUE_PTR_INIT(&fields[59]), NULL}, 5247 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "double_value"), UPB_TABVALUE_PTR_INIT(&fields[7]), NULL}, 5248 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5249 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5250 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "name"), UPB_TABVALUE_PTR_INIT(&fields[36]), NULL}, 5251 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5252 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5253 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5254 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "negative_int_value"), UPB_TABVALUE_PTR_INIT(&fields[43]), NULL}, 5255 {UPB_TABKEY_STR("\017", "\000", "\000", "\000", "aggregate_value"), UPB_TABVALUE_PTR_INIT(&fields[0]), NULL}, 5256 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5257 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5258 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5259 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5260 {UPB_TABKEY_STR("\022", "\000", "\000", "\000", "positive_int_value"), UPB_TABVALUE_PTR_INIT(&fields[60]), NULL}, 5261 {UPB_TABKEY_STR("\020", "\000", "\000", "\000", "identifier_value"), UPB_TABVALUE_PTR_INIT(&fields[19]), NULL}, 5262 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "string_value"), UPB_TABVALUE_PTR_INIT(&fields[67]), &strentries[154]}, 5263 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5264 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5265 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "is_extension"), UPB_TABVALUE_PTR_INIT(&fields[21]), NULL}, 5266 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "name_part"), UPB_TABVALUE_PTR_INIT(&fields[42]), NULL}, 5267 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REQUIRED"), UPB_TABVALUE_INT_INIT(2), &strentries[162]}, 5268 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5269 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_REPEATED"), UPB_TABVALUE_INT_INIT(3), NULL}, 5270 {UPB_TABKEY_STR("\016", "\000", "\000", "\000", "LABEL_OPTIONAL"), UPB_TABVALUE_INT_INIT(1), NULL}, 5271 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED64"), UPB_TABVALUE_INT_INIT(6), NULL}, 5272 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5273 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5274 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5275 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5276 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_STRING"), UPB_TABVALUE_INT_INIT(9), NULL}, 5277 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_FLOAT"), UPB_TABVALUE_INT_INIT(2), &strentries[193]}, 5278 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_DOUBLE"), UPB_TABVALUE_INT_INIT(1), NULL}, 5279 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5280 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT32"), UPB_TABVALUE_INT_INIT(5), NULL}, 5281 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED32"), UPB_TABVALUE_INT_INIT(15), NULL}, 5282 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_FIXED32"), UPB_TABVALUE_INT_INIT(7), NULL}, 5283 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5284 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "TYPE_MESSAGE"), UPB_TABVALUE_INT_INIT(11), &strentries[194]}, 5285 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5286 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5287 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_INT64"), UPB_TABVALUE_INT_INIT(3), &strentries[191]}, 5288 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5289 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5290 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5291 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5292 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_ENUM"), UPB_TABVALUE_INT_INIT(14), NULL}, 5293 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT32"), UPB_TABVALUE_INT_INIT(13), NULL}, 5294 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5295 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_UINT64"), UPB_TABVALUE_INT_INIT(4), &strentries[190]}, 5296 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5297 {UPB_TABKEY_STR("\015", "\000", "\000", "\000", "TYPE_SFIXED64"), UPB_TABVALUE_INT_INIT(16), NULL}, 5298 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_BYTES"), UPB_TABVALUE_INT_INIT(12), NULL}, 5299 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT64"), UPB_TABVALUE_INT_INIT(18), NULL}, 5300 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "TYPE_BOOL"), UPB_TABVALUE_INT_INIT(8), NULL}, 5301 {UPB_TABKEY_STR("\012", "\000", "\000", "\000", "TYPE_GROUP"), UPB_TABVALUE_INT_INIT(10), NULL}, 5302 {UPB_TABKEY_STR("\013", "\000", "\000", "\000", "TYPE_SINT32"), UPB_TABVALUE_INT_INIT(17), NULL}, 5303 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5304 {UPB_TABKEY_STR("\004", "\000", "\000", "\000", "CORD"), UPB_TABVALUE_INT_INIT(1), NULL}, 5305 {UPB_TABKEY_STR("\006", "\000", "\000", "\000", "STRING"), UPB_TABVALUE_INT_INIT(0), &strentries[197]}, 5306 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "STRING_PIECE"), UPB_TABVALUE_INT_INIT(2), NULL}, 5307 {UPB_TABKEY_STR("\011", "\000", "\000", "\000", "CODE_SIZE"), UPB_TABVALUE_INT_INIT(2), NULL}, 5308 {UPB_TABKEY_STR("\005", "\000", "\000", "\000", "SPEED"), UPB_TABVALUE_INT_INIT(1), &strentries[203]}, 5309 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5310 {UPB_TABKEY_STR("\014", "\000", "\000", "\000", "LITE_RUNTIME"), UPB_TABVALUE_INT_INIT(3), NULL}, 5311 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5312 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5313 {UPB_TABKEY_STR("\047", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo.Location"), UPB_TABVALUE_PTR_INIT(&msgs[17]), NULL}, 5314 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.UninterpretedOption"), UPB_TABVALUE_PTR_INIT(&msgs[18]), NULL}, 5315 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.FileDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[8]), NULL}, 5316 {UPB_TABKEY_STR("\045", "\000", "\000", "\000", "google.protobuf.MethodDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[12]), NULL}, 5317 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5318 {UPB_TABKEY_STR("\040", "\000", "\000", "\000", "google.protobuf.EnumValueOptions"), UPB_TABVALUE_PTR_INIT(&msgs[5]), NULL}, 5319 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5320 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5321 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5322 {UPB_TABKEY_STR("\037", "\000", "\000", "\000", "google.protobuf.DescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[0]), &strentries[228]}, 5323 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5324 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.SourceCodeInfo"), UPB_TABVALUE_PTR_INIT(&msgs[16]), NULL}, 5325 {UPB_TABKEY_STR("\051", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Type"), UPB_TABVALUE_PTR_INIT(&enums[1]), NULL}, 5326 {UPB_TABKEY_STR("\056", "\000", "\000", "\000", "google.protobuf.DescriptorProto.ExtensionRange"), UPB_TABVALUE_PTR_INIT(&msgs[1]), NULL}, 5327 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5328 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.EnumValueDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[4]), NULL}, 5329 {UPB_TABKEY_STR("\034", "\000", "\000", "\000", "google.protobuf.FieldOptions"), UPB_TABVALUE_PTR_INIT(&msgs[7]), NULL}, 5330 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.FileOptions"), UPB_TABVALUE_PTR_INIT(&msgs[10]), NULL}, 5331 {UPB_TABKEY_STR("\043", "\000", "\000", "\000", "google.protobuf.EnumDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[2]), &strentries[233]}, 5332 {UPB_TABKEY_STR("\052", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto.Label"), UPB_TABVALUE_PTR_INIT(&enums[0]), NULL}, 5333 {UPB_TABKEY_STR("\046", "\000", "\000", "\000", "google.protobuf.ServiceDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[14]), NULL}, 5334 {UPB_TABKEY_STR("\042", "\000", "\000", "\000", "google.protobuf.FieldOptions.CType"), UPB_TABVALUE_PTR_INIT(&enums[2]), &strentries[229]}, 5335 {UPB_TABKEY_STR("\041", "\000", "\000", "\000", "google.protobuf.FileDescriptorSet"), UPB_TABVALUE_PTR_INIT(&msgs[9]), &strentries[235]}, 5336 {UPB_TABKEY_STR("\033", "\000", "\000", "\000", "google.protobuf.EnumOptions"), UPB_TABVALUE_PTR_INIT(&msgs[3]), NULL}, 5337 {UPB_TABKEY_STR("\044", "\000", "\000", "\000", "google.protobuf.FieldDescriptorProto"), UPB_TABVALUE_PTR_INIT(&msgs[6]), NULL}, 5338 {UPB_TABKEY_STR("\050", "\000", "\000", "\000", "google.protobuf.FileOptions.OptimizeMode"), UPB_TABVALUE_PTR_INIT(&enums[3]), &strentries[221]}, 5339 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.ServiceOptions"), UPB_TABVALUE_PTR_INIT(&msgs[15]), NULL}, 5340 {UPB_TABKEY_STR("\036", "\000", "\000", "\000", "google.protobuf.MessageOptions"), UPB_TABVALUE_PTR_INIT(&msgs[11]), NULL}, 5341 {UPB_TABKEY_STR("\035", "\000", "\000", "\000", "google.protobuf.MethodOptions"), UPB_TABVALUE_PTR_INIT(&msgs[13]), &strentries[226]}, 5342 {UPB_TABKEY_STR("\054", "\000", "\000", "\000", "google.protobuf.UninterpretedOption.NamePart"), UPB_TABVALUE_PTR_INIT(&msgs[19]), NULL}, 5343 }; 5344 5345 static const upb_tabent intentries[14] = { 5346 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5347 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[73]), NULL}, 5348 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5349 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[71]), NULL}, 5350 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5351 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[77]), NULL}, 5352 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5353 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[75]), NULL}, 5354 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5355 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[76]), NULL}, 5356 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5357 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[74]), NULL}, 5358 {UPB_TABKEY_NONE, UPB_TABVALUE_EMPTY_INIT, NULL}, 5359 {UPB_TABKEY_NUM(999), UPB_TABVALUE_PTR_INIT(&fields[72]), NULL}, 5360 }; 5361 5362 static const upb_tabval arrays[232] = { 5363 UPB_TABVALUE_EMPTY_INIT, 5364 UPB_TABVALUE_PTR_INIT(&fields[38]), 5365 UPB_TABVALUE_PTR_INIT(&fields[16]), 5366 UPB_TABVALUE_PTR_INIT(&fields[44]), 5367 UPB_TABVALUE_PTR_INIT(&fields[9]), 5368 UPB_TABVALUE_PTR_INIT(&fields[15]), 5369 UPB_TABVALUE_PTR_INIT(&fields[14]), 5370 UPB_TABVALUE_PTR_INIT(&fields[49]), 5371 UPB_TABVALUE_EMPTY_INIT, 5372 UPB_TABVALUE_PTR_INIT(&fields[66]), 5373 UPB_TABVALUE_PTR_INIT(&fields[8]), 5374 UPB_TABVALUE_EMPTY_INIT, 5375 UPB_TABVALUE_PTR_INIT(&fields[40]), 5376 UPB_TABVALUE_PTR_INIT(&fields[78]), 5377 UPB_TABVALUE_PTR_INIT(&fields[50]), 5378 UPB_TABVALUE_EMPTY_INIT, 5379 UPB_TABVALUE_EMPTY_INIT, 5380 UPB_TABVALUE_PTR_INIT(&fields[1]), 5381 UPB_TABVALUE_EMPTY_INIT, 5382 UPB_TABVALUE_EMPTY_INIT, 5383 UPB_TABVALUE_EMPTY_INIT, 5384 UPB_TABVALUE_EMPTY_INIT, 5385 UPB_TABVALUE_EMPTY_INIT, 5386 UPB_TABVALUE_EMPTY_INIT, 5387 UPB_TABVALUE_PTR_INIT(&fields[37]), 5388 UPB_TABVALUE_PTR_INIT(&fields[47]), 5389 UPB_TABVALUE_PTR_INIT(&fields[52]), 5390 UPB_TABVALUE_EMPTY_INIT, 5391 UPB_TABVALUE_EMPTY_INIT, 5392 UPB_TABVALUE_EMPTY_INIT, 5393 UPB_TABVALUE_EMPTY_INIT, 5394 UPB_TABVALUE_EMPTY_INIT, 5395 UPB_TABVALUE_PTR_INIT(&fields[41]), 5396 UPB_TABVALUE_PTR_INIT(&fields[12]), 5397 UPB_TABVALUE_PTR_INIT(&fields[46]), 5398 UPB_TABVALUE_PTR_INIT(&fields[27]), 5399 UPB_TABVALUE_PTR_INIT(&fields[69]), 5400 UPB_TABVALUE_PTR_INIT(&fields[70]), 5401 UPB_TABVALUE_PTR_INIT(&fields[4]), 5402 UPB_TABVALUE_PTR_INIT(&fields[51]), 5403 UPB_TABVALUE_EMPTY_INIT, 5404 UPB_TABVALUE_PTR_INIT(&fields[3]), 5405 UPB_TABVALUE_PTR_INIT(&fields[58]), 5406 UPB_TABVALUE_PTR_INIT(&fields[6]), 5407 UPB_TABVALUE_EMPTY_INIT, 5408 UPB_TABVALUE_PTR_INIT(&fields[28]), 5409 UPB_TABVALUE_EMPTY_INIT, 5410 UPB_TABVALUE_EMPTY_INIT, 5411 UPB_TABVALUE_EMPTY_INIT, 5412 UPB_TABVALUE_PTR_INIT(&fields[11]), 5413 UPB_TABVALUE_PTR_INIT(&fields[79]), 5414 UPB_TABVALUE_EMPTY_INIT, 5415 UPB_TABVALUE_EMPTY_INIT, 5416 UPB_TABVALUE_EMPTY_INIT, 5417 UPB_TABVALUE_EMPTY_INIT, 5418 UPB_TABVALUE_EMPTY_INIT, 5419 UPB_TABVALUE_EMPTY_INIT, 5420 UPB_TABVALUE_EMPTY_INIT, 5421 UPB_TABVALUE_EMPTY_INIT, 5422 UPB_TABVALUE_EMPTY_INIT, 5423 UPB_TABVALUE_EMPTY_INIT, 5424 UPB_TABVALUE_EMPTY_INIT, 5425 UPB_TABVALUE_EMPTY_INIT, 5426 UPB_TABVALUE_EMPTY_INIT, 5427 UPB_TABVALUE_EMPTY_INIT, 5428 UPB_TABVALUE_EMPTY_INIT, 5429 UPB_TABVALUE_EMPTY_INIT, 5430 UPB_TABVALUE_EMPTY_INIT, 5431 UPB_TABVALUE_EMPTY_INIT, 5432 UPB_TABVALUE_EMPTY_INIT, 5433 UPB_TABVALUE_EMPTY_INIT, 5434 UPB_TABVALUE_EMPTY_INIT, 5435 UPB_TABVALUE_EMPTY_INIT, 5436 UPB_TABVALUE_PTR_INIT(&fields[34]), 5437 UPB_TABVALUE_PTR_INIT(&fields[57]), 5438 UPB_TABVALUE_PTR_INIT(&fields[5]), 5439 UPB_TABVALUE_PTR_INIT(&fields[32]), 5440 UPB_TABVALUE_PTR_INIT(&fields[10]), 5441 UPB_TABVALUE_PTR_INIT(&fields[63]), 5442 UPB_TABVALUE_PTR_INIT(&fields[13]), 5443 UPB_TABVALUE_PTR_INIT(&fields[53]), 5444 UPB_TABVALUE_PTR_INIT(&fields[64]), 5445 UPB_TABVALUE_PTR_INIT(&fields[61]), 5446 UPB_TABVALUE_PTR_INIT(&fields[80]), 5447 UPB_TABVALUE_EMPTY_INIT, 5448 UPB_TABVALUE_PTR_INIT(&fields[17]), 5449 UPB_TABVALUE_EMPTY_INIT, 5450 UPB_TABVALUE_PTR_INIT(&fields[26]), 5451 UPB_TABVALUE_EMPTY_INIT, 5452 UPB_TABVALUE_EMPTY_INIT, 5453 UPB_TABVALUE_EMPTY_INIT, 5454 UPB_TABVALUE_EMPTY_INIT, 5455 UPB_TABVALUE_EMPTY_INIT, 5456 UPB_TABVALUE_EMPTY_INIT, 5457 UPB_TABVALUE_PTR_INIT(&fields[25]), 5458 UPB_TABVALUE_PTR_INIT(&fields[48]), 5459 UPB_TABVALUE_PTR_INIT(&fields[24]), 5460 UPB_TABVALUE_PTR_INIT(&fields[18]), 5461 UPB_TABVALUE_EMPTY_INIT, 5462 UPB_TABVALUE_EMPTY_INIT, 5463 UPB_TABVALUE_EMPTY_INIT, 5464 UPB_TABVALUE_EMPTY_INIT, 5465 UPB_TABVALUE_PTR_INIT(&fields[2]), 5466 UPB_TABVALUE_PTR_INIT(&fields[23]), 5467 UPB_TABVALUE_PTR_INIT(&fields[62]), 5468 UPB_TABVALUE_EMPTY_INIT, 5469 UPB_TABVALUE_PTR_INIT(&fields[22]), 5470 UPB_TABVALUE_EMPTY_INIT, 5471 UPB_TABVALUE_EMPTY_INIT, 5472 UPB_TABVALUE_EMPTY_INIT, 5473 UPB_TABVALUE_EMPTY_INIT, 5474 UPB_TABVALUE_EMPTY_INIT, 5475 UPB_TABVALUE_EMPTY_INIT, 5476 UPB_TABVALUE_EMPTY_INIT, 5477 UPB_TABVALUE_EMPTY_INIT, 5478 UPB_TABVALUE_EMPTY_INIT, 5479 UPB_TABVALUE_EMPTY_INIT, 5480 UPB_TABVALUE_EMPTY_INIT, 5481 UPB_TABVALUE_EMPTY_INIT, 5482 UPB_TABVALUE_EMPTY_INIT, 5483 UPB_TABVALUE_EMPTY_INIT, 5484 UPB_TABVALUE_EMPTY_INIT, 5485 UPB_TABVALUE_EMPTY_INIT, 5486 UPB_TABVALUE_EMPTY_INIT, 5487 UPB_TABVALUE_EMPTY_INIT, 5488 UPB_TABVALUE_EMPTY_INIT, 5489 UPB_TABVALUE_EMPTY_INIT, 5490 UPB_TABVALUE_EMPTY_INIT, 5491 UPB_TABVALUE_EMPTY_INIT, 5492 UPB_TABVALUE_EMPTY_INIT, 5493 UPB_TABVALUE_EMPTY_INIT, 5494 UPB_TABVALUE_EMPTY_INIT, 5495 UPB_TABVALUE_EMPTY_INIT, 5496 UPB_TABVALUE_EMPTY_INIT, 5497 UPB_TABVALUE_EMPTY_INIT, 5498 UPB_TABVALUE_EMPTY_INIT, 5499 UPB_TABVALUE_EMPTY_INIT, 5500 UPB_TABVALUE_EMPTY_INIT, 5501 UPB_TABVALUE_EMPTY_INIT, 5502 UPB_TABVALUE_EMPTY_INIT, 5503 UPB_TABVALUE_EMPTY_INIT, 5504 UPB_TABVALUE_EMPTY_INIT, 5505 UPB_TABVALUE_EMPTY_INIT, 5506 UPB_TABVALUE_EMPTY_INIT, 5507 UPB_TABVALUE_EMPTY_INIT, 5508 UPB_TABVALUE_EMPTY_INIT, 5509 UPB_TABVALUE_EMPTY_INIT, 5510 UPB_TABVALUE_EMPTY_INIT, 5511 UPB_TABVALUE_EMPTY_INIT, 5512 UPB_TABVALUE_EMPTY_INIT, 5513 UPB_TABVALUE_EMPTY_INIT, 5514 UPB_TABVALUE_PTR_INIT(&fields[31]), 5515 UPB_TABVALUE_PTR_INIT(&fields[45]), 5516 UPB_TABVALUE_EMPTY_INIT, 5517 UPB_TABVALUE_EMPTY_INIT, 5518 UPB_TABVALUE_EMPTY_INIT, 5519 UPB_TABVALUE_EMPTY_INIT, 5520 UPB_TABVALUE_EMPTY_INIT, 5521 UPB_TABVALUE_EMPTY_INIT, 5522 UPB_TABVALUE_EMPTY_INIT, 5523 UPB_TABVALUE_EMPTY_INIT, 5524 UPB_TABVALUE_EMPTY_INIT, 5525 UPB_TABVALUE_EMPTY_INIT, 5526 UPB_TABVALUE_EMPTY_INIT, 5527 UPB_TABVALUE_EMPTY_INIT, 5528 UPB_TABVALUE_EMPTY_INIT, 5529 UPB_TABVALUE_EMPTY_INIT, 5530 UPB_TABVALUE_PTR_INIT(&fields[39]), 5531 UPB_TABVALUE_PTR_INIT(&fields[20]), 5532 UPB_TABVALUE_PTR_INIT(&fields[56]), 5533 UPB_TABVALUE_PTR_INIT(&fields[55]), 5534 UPB_TABVALUE_EMPTY_INIT, 5535 UPB_TABVALUE_EMPTY_INIT, 5536 UPB_TABVALUE_EMPTY_INIT, 5537 UPB_TABVALUE_EMPTY_INIT, 5538 UPB_TABVALUE_EMPTY_INIT, 5539 UPB_TABVALUE_PTR_INIT(&fields[35]), 5540 UPB_TABVALUE_PTR_INIT(&fields[33]), 5541 UPB_TABVALUE_PTR_INIT(&fields[54]), 5542 UPB_TABVALUE_EMPTY_INIT, 5543 UPB_TABVALUE_EMPTY_INIT, 5544 UPB_TABVALUE_EMPTY_INIT, 5545 UPB_TABVALUE_EMPTY_INIT, 5546 UPB_TABVALUE_EMPTY_INIT, 5547 UPB_TABVALUE_PTR_INIT(&fields[30]), 5548 UPB_TABVALUE_EMPTY_INIT, 5549 UPB_TABVALUE_PTR_INIT(&fields[59]), 5550 UPB_TABVALUE_PTR_INIT(&fields[65]), 5551 UPB_TABVALUE_PTR_INIT(&fields[29]), 5552 UPB_TABVALUE_PTR_INIT(&fields[68]), 5553 UPB_TABVALUE_EMPTY_INIT, 5554 UPB_TABVALUE_EMPTY_INIT, 5555 UPB_TABVALUE_PTR_INIT(&fields[36]), 5556 UPB_TABVALUE_PTR_INIT(&fields[19]), 5557 UPB_TABVALUE_PTR_INIT(&fields[60]), 5558 UPB_TABVALUE_PTR_INIT(&fields[43]), 5559 UPB_TABVALUE_PTR_INIT(&fields[7]), 5560 UPB_TABVALUE_PTR_INIT(&fields[67]), 5561 UPB_TABVALUE_PTR_INIT(&fields[0]), 5562 UPB_TABVALUE_EMPTY_INIT, 5563 UPB_TABVALUE_PTR_INIT(&fields[42]), 5564 UPB_TABVALUE_PTR_INIT(&fields[21]), 5565 UPB_TABVALUE_EMPTY_INIT, 5566 UPB_TABVALUE_PTR_INIT("LABEL_OPTIONAL"), 5567 UPB_TABVALUE_PTR_INIT("LABEL_REQUIRED"), 5568 UPB_TABVALUE_PTR_INIT("LABEL_REPEATED"), 5569 UPB_TABVALUE_EMPTY_INIT, 5570 UPB_TABVALUE_PTR_INIT("TYPE_DOUBLE"), 5571 UPB_TABVALUE_PTR_INIT("TYPE_FLOAT"), 5572 UPB_TABVALUE_PTR_INIT("TYPE_INT64"), 5573 UPB_TABVALUE_PTR_INIT("TYPE_UINT64"), 5574 UPB_TABVALUE_PTR_INIT("TYPE_INT32"), 5575 UPB_TABVALUE_PTR_INIT("TYPE_FIXED64"), 5576 UPB_TABVALUE_PTR_INIT("TYPE_FIXED32"), 5577 UPB_TABVALUE_PTR_INIT("TYPE_BOOL"), 5578 UPB_TABVALUE_PTR_INIT("TYPE_STRING"), 5579 UPB_TABVALUE_PTR_INIT("TYPE_GROUP"), 5580 UPB_TABVALUE_PTR_INIT("TYPE_MESSAGE"), 5581 UPB_TABVALUE_PTR_INIT("TYPE_BYTES"), 5582 UPB_TABVALUE_PTR_INIT("TYPE_UINT32"), 5583 UPB_TABVALUE_PTR_INIT("TYPE_ENUM"), 5584 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED32"), 5585 UPB_TABVALUE_PTR_INIT("TYPE_SFIXED64"), 5586 UPB_TABVALUE_PTR_INIT("TYPE_SINT32"), 5587 UPB_TABVALUE_PTR_INIT("TYPE_SINT64"), 5588 UPB_TABVALUE_PTR_INIT("STRING"), 5589 UPB_TABVALUE_PTR_INIT("CORD"), 5590 UPB_TABVALUE_PTR_INIT("STRING_PIECE"), 5591 UPB_TABVALUE_EMPTY_INIT, 5592 UPB_TABVALUE_PTR_INIT("SPEED"), 5593 UPB_TABVALUE_PTR_INIT("CODE_SIZE"), 5594 UPB_TABVALUE_PTR_INIT("LITE_RUNTIME"), 5595 }; 5596 5597 static const upb_symtab symtab = UPB_SYMTAB_INIT(UPB_STRTABLE_INIT(24, 31, UPB_CTYPE_PTR, 5, &strentries[204]), &reftables[210], &reftables[211]); 5598 5599 const upb_symtab *upbdefs_google_protobuf_descriptor(const void *owner) { 5600 upb_symtab_ref(&symtab, owner); 5601 return &symtab; 5602 } 5603 5604 #ifdef UPB_DEBUG_REFS 5605 static upb_inttable reftables[212] = { 5606 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5607 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5608 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5609 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5610 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5611 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5612 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5613 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5614 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5615 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5616 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5617 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5618 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5619 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5620 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5621 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5622 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5623 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5624 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5625 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5626 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5627 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5628 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5629 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5630 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5631 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5632 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5633 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5634 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5635 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5636 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5637 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5638 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5639 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5640 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5641 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5642 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5643 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5644 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5645 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5646 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5647 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5648 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5649 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5650 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5651 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5652 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5653 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5654 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5655 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5656 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5657 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5658 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5659 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5660 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5661 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5662 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5663 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5664 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5665 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5666 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5667 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5668 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5669 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5670 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5671 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5672 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5673 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5674 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5675 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5676 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5677 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5678 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5679 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5680 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5681 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5682 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5683 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5684 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5685 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5686 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5687 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5688 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5689 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5690 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5691 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5692 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5693 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5694 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5695 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5696 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5697 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5698 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5699 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5700 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5701 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5702 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5703 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5704 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5705 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5706 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5707 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5708 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5709 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5710 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5711 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5712 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5713 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5714 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5715 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5716 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5717 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5718 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5719 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5720 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5721 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5722 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5723 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5724 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5725 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5726 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5727 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5728 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5729 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5730 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5731 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5732 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5733 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5734 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5735 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5736 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5737 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5738 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5739 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5740 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5741 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5742 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5743 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5744 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5745 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5746 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5747 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5748 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5749 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5750 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5751 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5752 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5753 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5754 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5755 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5756 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5757 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5758 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5759 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5760 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5761 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5762 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5763 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5764 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5765 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5766 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5767 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5768 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5769 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5770 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5771 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5772 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5773 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5774 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5775 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5776 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5777 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5778 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5779 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5780 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5781 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5782 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5783 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5784 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5785 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5786 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5787 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5788 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5789 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5790 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5791 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5792 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5793 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5794 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5795 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5796 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5797 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5798 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5799 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5800 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5801 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5802 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5803 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5804 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5805 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5806 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5807 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5808 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5809 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5810 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5811 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5812 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5813 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5814 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5815 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5816 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5817 UPB_EMPTY_INTTABLE_INIT(UPB_CTYPE_PTR), 5818 }; 5819 #endif 5820 5821 /* 5822 ** XXX: The routines in this file that consume a string do not currently 5823 ** support having the string span buffers. In the future, as upb_sink and 5824 ** its buffering/sharing functionality evolve there should be an easy and 5825 ** idiomatic way of correctly handling this case. For now, we accept this 5826 ** limitation since we currently only parse descriptors from single strings. 5827 */ 5828 5829 5830 #include <errno.h> 5831 #include <stdlib.h> 5832 #include <string.h> 5833 5834 /* upb_deflist is an internal-only dynamic array for storing a growing list of 5835 * upb_defs. */ 5836 typedef struct { 5837 upb_def **defs; 5838 size_t len; 5839 size_t size; 5840 bool owned; 5841 } upb_deflist; 5842 5843 /* We keep a stack of all the messages scopes we are currently in, as well as 5844 * the top-level file scope. This is necessary to correctly qualify the 5845 * definitions that are contained inside. "name" tracks the name of the 5846 * message or package (a bare name -- not qualified by any enclosing scopes). */ 5847 typedef struct { 5848 char *name; 5849 /* Index of the first def that is under this scope. For msgdefs, the 5850 * msgdef itself is at start-1. */ 5851 int start; 5852 } upb_descreader_frame; 5853 5854 /* The maximum number of nested declarations that are allowed, ie. 5855 * message Foo { 5856 * message Bar { 5857 * message Baz { 5858 * } 5859 * } 5860 * } 5861 * 5862 * This is a resource limit that affects how big our runtime stack can grow. 5863 * TODO: make this a runtime-settable property of the Reader instance. */ 5864 #define UPB_MAX_MESSAGE_NESTING 64 5865 5866 struct upb_descreader { 5867 upb_sink sink; 5868 upb_deflist defs; 5869 upb_descreader_frame stack[UPB_MAX_MESSAGE_NESTING]; 5870 int stack_len; 5871 5872 uint32_t number; 5873 char *name; 5874 bool saw_number; 5875 bool saw_name; 5876 5877 char *default_string; 5878 5879 upb_fielddef *f; 5880 }; 5881 5882 static char *upb_strndup(const char *buf, size_t n) { 5883 char *ret = malloc(n + 1); 5884 if (!ret) return NULL; 5885 memcpy(ret, buf, n); 5886 ret[n] = '\0'; 5887 return ret; 5888 } 5889 5890 /* Returns a newly allocated string that joins input strings together, for 5891 * example: 5892 * join("Foo.Bar", "Baz") -> "Foo.Bar.Baz" 5893 * join("", "Baz") -> "Baz" 5894 * Caller owns a ref on the returned string. */ 5895 static char *upb_join(const char *base, const char *name) { 5896 if (!base || strlen(base) == 0) { 5897 return upb_strdup(name); 5898 } else { 5899 char *ret = malloc(strlen(base) + strlen(name) + 2); 5900 ret[0] = '\0'; 5901 strcat(ret, base); 5902 strcat(ret, "."); 5903 strcat(ret, name); 5904 return ret; 5905 } 5906 } 5907 5908 5909 /* upb_deflist ****************************************************************/ 5910 5911 void upb_deflist_init(upb_deflist *l) { 5912 l->size = 0; 5913 l->defs = NULL; 5914 l->len = 0; 5915 l->owned = true; 5916 } 5917 5918 void upb_deflist_uninit(upb_deflist *l) { 5919 size_t i; 5920 if (l->owned) 5921 for(i = 0; i < l->len; i++) 5922 upb_def_unref(l->defs[i], l); 5923 free(l->defs); 5924 } 5925 5926 bool upb_deflist_push(upb_deflist *l, upb_def *d) { 5927 if(++l->len >= l->size) { 5928 size_t new_size = UPB_MAX(l->size, 4); 5929 new_size *= 2; 5930 l->defs = realloc(l->defs, new_size * sizeof(void *)); 5931 if (!l->defs) return false; 5932 l->size = new_size; 5933 } 5934 l->defs[l->len - 1] = d; 5935 return true; 5936 } 5937 5938 void upb_deflist_donaterefs(upb_deflist *l, void *owner) { 5939 size_t i; 5940 assert(l->owned); 5941 for (i = 0; i < l->len; i++) 5942 upb_def_donateref(l->defs[i], l, owner); 5943 l->owned = false; 5944 } 5945 5946 static upb_def *upb_deflist_last(upb_deflist *l) { 5947 return l->defs[l->len-1]; 5948 } 5949 5950 /* Qualify the defname for all defs starting with offset "start" with "str". */ 5951 static void upb_deflist_qualify(upb_deflist *l, char *str, int32_t start) { 5952 uint32_t i; 5953 for (i = start; i < l->len; i++) { 5954 upb_def *def = l->defs[i]; 5955 char *name = upb_join(str, upb_def_fullname(def)); 5956 upb_def_setfullname(def, name, NULL); 5957 free(name); 5958 } 5959 } 5960 5961 5962 /* upb_descreader ************************************************************/ 5963 5964 static upb_msgdef *upb_descreader_top(upb_descreader *r) { 5965 int index; 5966 assert(r->stack_len > 1); 5967 index = r->stack[r->stack_len-1].start - 1; 5968 assert(index >= 0); 5969 return upb_downcast_msgdef_mutable(r->defs.defs[index]); 5970 } 5971 5972 static upb_def *upb_descreader_last(upb_descreader *r) { 5973 return upb_deflist_last(&r->defs); 5974 } 5975 5976 /* Start/end handlers for FileDescriptorProto and DescriptorProto (the two 5977 * entities that have names and can contain sub-definitions. */ 5978 void upb_descreader_startcontainer(upb_descreader *r) { 5979 upb_descreader_frame *f = &r->stack[r->stack_len++]; 5980 f->start = r->defs.len; 5981 f->name = NULL; 5982 } 5983 5984 void upb_descreader_endcontainer(upb_descreader *r) { 5985 upb_descreader_frame *f = &r->stack[--r->stack_len]; 5986 upb_deflist_qualify(&r->defs, f->name, f->start); 5987 free(f->name); 5988 f->name = NULL; 5989 } 5990 5991 void upb_descreader_setscopename(upb_descreader *r, char *str) { 5992 upb_descreader_frame *f = &r->stack[r->stack_len-1]; 5993 free(f->name); 5994 f->name = str; 5995 } 5996 5997 /* Handlers for google.protobuf.FileDescriptorProto. */ 5998 static bool file_startmsg(void *r, const void *hd) { 5999 UPB_UNUSED(hd); 6000 upb_descreader_startcontainer(r); 6001 return true; 6002 } 6003 6004 static bool file_endmsg(void *closure, const void *hd, upb_status *status) { 6005 upb_descreader *r = closure; 6006 UPB_UNUSED(hd); 6007 UPB_UNUSED(status); 6008 upb_descreader_endcontainer(r); 6009 return true; 6010 } 6011 6012 static size_t file_onpackage(void *closure, const void *hd, const char *buf, 6013 size_t n, const upb_bufhandle *handle) { 6014 upb_descreader *r = closure; 6015 UPB_UNUSED(hd); 6016 UPB_UNUSED(handle); 6017 /* XXX: see comment at the top of the file. */ 6018 upb_descreader_setscopename(r, upb_strndup(buf, n)); 6019 return n; 6020 } 6021 6022 /* Handlers for google.protobuf.EnumValueDescriptorProto. */ 6023 static bool enumval_startmsg(void *closure, const void *hd) { 6024 upb_descreader *r = closure; 6025 UPB_UNUSED(hd); 6026 r->saw_number = false; 6027 r->saw_name = false; 6028 return true; 6029 } 6030 6031 static size_t enumval_onname(void *closure, const void *hd, const char *buf, 6032 size_t n, const upb_bufhandle *handle) { 6033 upb_descreader *r = closure; 6034 UPB_UNUSED(hd); 6035 UPB_UNUSED(handle); 6036 /* XXX: see comment at the top of the file. */ 6037 free(r->name); 6038 r->name = upb_strndup(buf, n); 6039 r->saw_name = true; 6040 return n; 6041 } 6042 6043 static bool enumval_onnumber(void *closure, const void *hd, int32_t val) { 6044 upb_descreader *r = closure; 6045 UPB_UNUSED(hd); 6046 r->number = val; 6047 r->saw_number = true; 6048 return true; 6049 } 6050 6051 static bool enumval_endmsg(void *closure, const void *hd, upb_status *status) { 6052 upb_descreader *r = closure; 6053 upb_enumdef *e; 6054 UPB_UNUSED(hd); 6055 6056 if(!r->saw_number || !r->saw_name) { 6057 upb_status_seterrmsg(status, "Enum value missing name or number."); 6058 return false; 6059 } 6060 e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); 6061 upb_enumdef_addval(e, r->name, r->number, status); 6062 free(r->name); 6063 r->name = NULL; 6064 return true; 6065 } 6066 6067 6068 /* Handlers for google.protobuf.EnumDescriptorProto. */ 6069 static bool enum_startmsg(void *closure, const void *hd) { 6070 upb_descreader *r = closure; 6071 UPB_UNUSED(hd); 6072 upb_deflist_push(&r->defs, 6073 upb_enumdef_upcast_mutable(upb_enumdef_new(&r->defs))); 6074 return true; 6075 } 6076 6077 static bool enum_endmsg(void *closure, const void *hd, upb_status *status) { 6078 upb_descreader *r = closure; 6079 upb_enumdef *e; 6080 UPB_UNUSED(hd); 6081 6082 e = upb_downcast_enumdef_mutable(upb_descreader_last(r)); 6083 if (upb_def_fullname(upb_descreader_last(r)) == NULL) { 6084 upb_status_seterrmsg(status, "Enum had no name."); 6085 return false; 6086 } 6087 if (upb_enumdef_numvals(e) == 0) { 6088 upb_status_seterrmsg(status, "Enum had no values."); 6089 return false; 6090 } 6091 return true; 6092 } 6093 6094 static size_t enum_onname(void *closure, const void *hd, const char *buf, 6095 size_t n, const upb_bufhandle *handle) { 6096 upb_descreader *r = closure; 6097 char *fullname = upb_strndup(buf, n); 6098 UPB_UNUSED(hd); 6099 UPB_UNUSED(handle); 6100 /* XXX: see comment at the top of the file. */ 6101 upb_def_setfullname(upb_descreader_last(r), fullname, NULL); 6102 free(fullname); 6103 return n; 6104 } 6105 6106 /* Handlers for google.protobuf.FieldDescriptorProto */ 6107 static bool field_startmsg(void *closure, const void *hd) { 6108 upb_descreader *r = closure; 6109 UPB_UNUSED(hd); 6110 r->f = upb_fielddef_new(&r->defs); 6111 free(r->default_string); 6112 r->default_string = NULL; 6113 6114 /* fielddefs default to packed, but descriptors default to non-packed. */ 6115 upb_fielddef_setpacked(r->f, false); 6116 return true; 6117 } 6118 6119 /* Converts the default value in string "str" into "d". Passes a ref on str. 6120 * Returns true on success. */ 6121 static bool parse_default(char *str, upb_fielddef *f) { 6122 bool success = true; 6123 char *end; 6124 switch (upb_fielddef_type(f)) { 6125 case UPB_TYPE_INT32: { 6126 long val = strtol(str, &end, 0); 6127 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || *end) 6128 success = false; 6129 else 6130 upb_fielddef_setdefaultint32(f, val); 6131 break; 6132 } 6133 case UPB_TYPE_INT64: { 6134 /* XXX: Need to write our own strtoll, since it's not available in c89. */ 6135 long long val = strtol(str, &end, 0); 6136 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || *end) 6137 success = false; 6138 else 6139 upb_fielddef_setdefaultint64(f, val); 6140 break; 6141 } 6142 case UPB_TYPE_UINT32: { 6143 unsigned long val = strtoul(str, &end, 0); 6144 if (val > UINT32_MAX || errno == ERANGE || *end) 6145 success = false; 6146 else 6147 upb_fielddef_setdefaultuint32(f, val); 6148 break; 6149 } 6150 case UPB_TYPE_UINT64: { 6151 /* XXX: Need to write our own strtoull, since it's not available in c89. */ 6152 unsigned long long val = strtoul(str, &end, 0); 6153 if (val > UINT64_MAX || errno == ERANGE || *end) 6154 success = false; 6155 else 6156 upb_fielddef_setdefaultuint64(f, val); 6157 break; 6158 } 6159 case UPB_TYPE_DOUBLE: { 6160 double val = strtod(str, &end); 6161 if (errno == ERANGE || *end) 6162 success = false; 6163 else 6164 upb_fielddef_setdefaultdouble(f, val); 6165 break; 6166 } 6167 case UPB_TYPE_FLOAT: { 6168 /* XXX: Need to write our own strtof, since it's not available in c89. */ 6169 float val = strtod(str, &end); 6170 if (errno == ERANGE || *end) 6171 success = false; 6172 else 6173 upb_fielddef_setdefaultfloat(f, val); 6174 break; 6175 } 6176 case UPB_TYPE_BOOL: { 6177 if (strcmp(str, "false") == 0) 6178 upb_fielddef_setdefaultbool(f, false); 6179 else if (strcmp(str, "true") == 0) 6180 upb_fielddef_setdefaultbool(f, true); 6181 else 6182 success = false; 6183 break; 6184 } 6185 default: abort(); 6186 } 6187 return success; 6188 } 6189 6190 static bool field_endmsg(void *closure, const void *hd, upb_status *status) { 6191 upb_descreader *r = closure; 6192 upb_fielddef *f = r->f; 6193 UPB_UNUSED(hd); 6194 6195 /* TODO: verify that all required fields were present. */ 6196 assert(upb_fielddef_number(f) != 0); 6197 assert(upb_fielddef_name(f) != NULL); 6198 assert((upb_fielddef_subdefname(f) != NULL) == upb_fielddef_hassubdef(f)); 6199 6200 if (r->default_string) { 6201 if (upb_fielddef_issubmsg(f)) { 6202 upb_status_seterrmsg(status, "Submessages cannot have defaults."); 6203 return false; 6204 } 6205 if (upb_fielddef_isstring(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM) { 6206 upb_fielddef_setdefaultcstr(f, r->default_string, NULL); 6207 } else { 6208 if (r->default_string && !parse_default(r->default_string, f)) { 6209 /* We don't worry too much about giving a great error message since the 6210 * compiler should have ensured this was correct. */ 6211 upb_status_seterrmsg(status, "Error converting default value."); 6212 return false; 6213 } 6214 } 6215 } 6216 return true; 6217 } 6218 6219 static bool field_onlazy(void *closure, const void *hd, bool val) { 6220 upb_descreader *r = closure; 6221 UPB_UNUSED(hd); 6222 6223 upb_fielddef_setlazy(r->f, val); 6224 return true; 6225 } 6226 6227 static bool field_onpacked(void *closure, const void *hd, bool val) { 6228 upb_descreader *r = closure; 6229 UPB_UNUSED(hd); 6230 6231 upb_fielddef_setpacked(r->f, val); 6232 return true; 6233 } 6234 6235 static bool field_ontype(void *closure, const void *hd, int32_t val) { 6236 upb_descreader *r = closure; 6237 UPB_UNUSED(hd); 6238 6239 upb_fielddef_setdescriptortype(r->f, val); 6240 return true; 6241 } 6242 6243 static bool field_onlabel(void *closure, const void *hd, int32_t val) { 6244 upb_descreader *r = closure; 6245 UPB_UNUSED(hd); 6246 6247 upb_fielddef_setlabel(r->f, val); 6248 return true; 6249 } 6250 6251 static bool field_onnumber(void *closure, const void *hd, int32_t val) { 6252 upb_descreader *r = closure; 6253 bool ok = upb_fielddef_setnumber(r->f, val, NULL); 6254 UPB_UNUSED(hd); 6255 6256 UPB_ASSERT_VAR(ok, ok); 6257 return true; 6258 } 6259 6260 static size_t field_onname(void *closure, const void *hd, const char *buf, 6261 size_t n, const upb_bufhandle *handle) { 6262 upb_descreader *r = closure; 6263 char *name = upb_strndup(buf, n); 6264 UPB_UNUSED(hd); 6265 UPB_UNUSED(handle); 6266 6267 /* XXX: see comment at the top of the file. */ 6268 upb_fielddef_setname(r->f, name, NULL); 6269 free(name); 6270 return n; 6271 } 6272 6273 static size_t field_ontypename(void *closure, const void *hd, const char *buf, 6274 size_t n, const upb_bufhandle *handle) { 6275 upb_descreader *r = closure; 6276 char *name = upb_strndup(buf, n); 6277 UPB_UNUSED(hd); 6278 UPB_UNUSED(handle); 6279 6280 /* XXX: see comment at the top of the file. */ 6281 upb_fielddef_setsubdefname(r->f, name, NULL); 6282 free(name); 6283 return n; 6284 } 6285 6286 static size_t field_onextendee(void *closure, const void *hd, const char *buf, 6287 size_t n, const upb_bufhandle *handle) { 6288 upb_descreader *r = closure; 6289 char *name = upb_strndup(buf, n); 6290 UPB_UNUSED(hd); 6291 UPB_UNUSED(handle); 6292 6293 /* XXX: see comment at the top of the file. */ 6294 upb_fielddef_setcontainingtypename(r->f, name, NULL); 6295 free(name); 6296 return n; 6297 } 6298 6299 static size_t field_ondefaultval(void *closure, const void *hd, const char *buf, 6300 size_t n, const upb_bufhandle *handle) { 6301 upb_descreader *r = closure; 6302 UPB_UNUSED(hd); 6303 UPB_UNUSED(handle); 6304 6305 /* Have to convert from string to the correct type, but we might not know the 6306 * type yet, so we save it as a string until the end of the field. 6307 * XXX: see comment at the top of the file. */ 6308 free(r->default_string); 6309 r->default_string = upb_strndup(buf, n); 6310 return n; 6311 } 6312 6313 /* Handlers for google.protobuf.DescriptorProto (representing a message). */ 6314 static bool msg_startmsg(void *closure, const void *hd) { 6315 upb_descreader *r = closure; 6316 UPB_UNUSED(hd); 6317 6318 upb_deflist_push(&r->defs, 6319 upb_msgdef_upcast_mutable(upb_msgdef_new(&r->defs))); 6320 upb_descreader_startcontainer(r); 6321 return true; 6322 } 6323 6324 static bool msg_endmsg(void *closure, const void *hd, upb_status *status) { 6325 upb_descreader *r = closure; 6326 upb_msgdef *m = upb_descreader_top(r); 6327 UPB_UNUSED(hd); 6328 6329 if(!upb_def_fullname(upb_msgdef_upcast_mutable(m))) { 6330 upb_status_seterrmsg(status, "Encountered message with no name."); 6331 return false; 6332 } 6333 upb_descreader_endcontainer(r); 6334 return true; 6335 } 6336 6337 static size_t msg_onname(void *closure, const void *hd, const char *buf, 6338 size_t n, const upb_bufhandle *handle) { 6339 upb_descreader *r = closure; 6340 upb_msgdef *m = upb_descreader_top(r); 6341 /* XXX: see comment at the top of the file. */ 6342 char *name = upb_strndup(buf, n); 6343 UPB_UNUSED(hd); 6344 UPB_UNUSED(handle); 6345 6346 upb_def_setfullname(upb_msgdef_upcast_mutable(m), name, NULL); 6347 upb_descreader_setscopename(r, name); /* Passes ownership of name. */ 6348 return n; 6349 } 6350 6351 static bool msg_onendfield(void *closure, const void *hd) { 6352 upb_descreader *r = closure; 6353 upb_msgdef *m = upb_descreader_top(r); 6354 UPB_UNUSED(hd); 6355 6356 upb_msgdef_addfield(m, r->f, &r->defs, NULL); 6357 r->f = NULL; 6358 return true; 6359 } 6360 6361 static bool pushextension(void *closure, const void *hd) { 6362 upb_descreader *r = closure; 6363 UPB_UNUSED(hd); 6364 6365 assert(upb_fielddef_containingtypename(r->f)); 6366 upb_fielddef_setisextension(r->f, true); 6367 upb_deflist_push(&r->defs, upb_fielddef_upcast_mutable(r->f)); 6368 r->f = NULL; 6369 return true; 6370 } 6371 6372 #define D(name) upbdefs_google_protobuf_ ## name(s) 6373 6374 static void reghandlers(const void *closure, upb_handlers *h) { 6375 const upb_symtab *s = closure; 6376 const upb_msgdef *m = upb_handlers_msgdef(h); 6377 6378 if (m == D(DescriptorProto)) { 6379 upb_handlers_setstartmsg(h, &msg_startmsg, NULL); 6380 upb_handlers_setendmsg(h, &msg_endmsg, NULL); 6381 upb_handlers_setstring(h, D(DescriptorProto_name), &msg_onname, NULL); 6382 upb_handlers_setendsubmsg(h, D(DescriptorProto_field), &msg_onendfield, 6383 NULL); 6384 upb_handlers_setendsubmsg(h, D(DescriptorProto_extension), &pushextension, 6385 NULL); 6386 } else if (m == D(FileDescriptorProto)) { 6387 upb_handlers_setstartmsg(h, &file_startmsg, NULL); 6388 upb_handlers_setendmsg(h, &file_endmsg, NULL); 6389 upb_handlers_setstring(h, D(FileDescriptorProto_package), &file_onpackage, 6390 NULL); 6391 upb_handlers_setendsubmsg(h, D(FileDescriptorProto_extension), &pushextension, 6392 NULL); 6393 } else if (m == D(EnumValueDescriptorProto)) { 6394 upb_handlers_setstartmsg(h, &enumval_startmsg, NULL); 6395 upb_handlers_setendmsg(h, &enumval_endmsg, NULL); 6396 upb_handlers_setstring(h, D(EnumValueDescriptorProto_name), &enumval_onname, NULL); 6397 upb_handlers_setint32(h, D(EnumValueDescriptorProto_number), &enumval_onnumber, 6398 NULL); 6399 } else if (m == D(EnumDescriptorProto)) { 6400 upb_handlers_setstartmsg(h, &enum_startmsg, NULL); 6401 upb_handlers_setendmsg(h, &enum_endmsg, NULL); 6402 upb_handlers_setstring(h, D(EnumDescriptorProto_name), &enum_onname, NULL); 6403 } else if (m == D(FieldDescriptorProto)) { 6404 upb_handlers_setstartmsg(h, &field_startmsg, NULL); 6405 upb_handlers_setendmsg(h, &field_endmsg, NULL); 6406 upb_handlers_setint32(h, D(FieldDescriptorProto_type), &field_ontype, 6407 NULL); 6408 upb_handlers_setint32(h, D(FieldDescriptorProto_label), &field_onlabel, 6409 NULL); 6410 upb_handlers_setint32(h, D(FieldDescriptorProto_number), &field_onnumber, 6411 NULL); 6412 upb_handlers_setstring(h, D(FieldDescriptorProto_name), &field_onname, 6413 NULL); 6414 upb_handlers_setstring(h, D(FieldDescriptorProto_type_name), 6415 &field_ontypename, NULL); 6416 upb_handlers_setstring(h, D(FieldDescriptorProto_extendee), 6417 &field_onextendee, NULL); 6418 upb_handlers_setstring(h, D(FieldDescriptorProto_default_value), 6419 &field_ondefaultval, NULL); 6420 } else if (m == D(FieldOptions)) { 6421 upb_handlers_setbool(h, D(FieldOptions_lazy), &field_onlazy, NULL); 6422 upb_handlers_setbool(h, D(FieldOptions_packed), &field_onpacked, NULL); 6423 } 6424 } 6425 6426 #undef D 6427 6428 void descreader_cleanup(void *_r) { 6429 upb_descreader *r = _r; 6430 free(r->name); 6431 upb_deflist_uninit(&r->defs); 6432 free(r->default_string); 6433 while (r->stack_len > 0) { 6434 upb_descreader_frame *f = &r->stack[--r->stack_len]; 6435 free(f->name); 6436 } 6437 } 6438 6439 6440 /* Public API ****************************************************************/ 6441 6442 upb_descreader *upb_descreader_create(upb_env *e, const upb_handlers *h) { 6443 upb_descreader *r = upb_env_malloc(e, sizeof(upb_descreader)); 6444 if (!r || !upb_env_addcleanup(e, descreader_cleanup, r)) { 6445 return NULL; 6446 } 6447 6448 upb_deflist_init(&r->defs); 6449 upb_sink_reset(upb_descreader_input(r), h, r); 6450 r->stack_len = 0; 6451 r->name = NULL; 6452 r->default_string = NULL; 6453 6454 return r; 6455 } 6456 6457 upb_def **upb_descreader_getdefs(upb_descreader *r, void *owner, int *n) { 6458 *n = r->defs.len; 6459 upb_deflist_donaterefs(&r->defs, owner); 6460 return r->defs.defs; 6461 } 6462 6463 upb_sink *upb_descreader_input(upb_descreader *r) { 6464 return &r->sink; 6465 } 6466 6467 const upb_handlers *upb_descreader_newhandlers(const void *owner) { 6468 const upb_symtab *s = upbdefs_google_protobuf_descriptor(&s); 6469 const upb_handlers *h = upb_handlers_newfrozen( 6470 upbdefs_google_protobuf_FileDescriptorSet(s), owner, reghandlers, s); 6471 upb_symtab_unref(s, &s); 6472 return h; 6473 } 6474 /* 6475 ** protobuf decoder bytecode compiler 6476 ** 6477 ** Code to compile a upb::Handlers into bytecode for decoding a protobuf 6478 ** according to that specific schema and destination handlers. 6479 ** 6480 ** Compiling to bytecode is always the first step. If we are using the 6481 ** interpreted decoder we leave it as bytecode and interpret that. If we are 6482 ** using a JIT decoder we use a code generator to turn the bytecode into native 6483 ** code, LLVM IR, etc. 6484 ** 6485 ** Bytecode definition is in decoder.int.h. 6486 */ 6487 6488 #include <stdarg.h> 6489 6490 #ifdef UPB_DUMP_BYTECODE 6491 #include <stdio.h> 6492 #endif 6493 6494 #define MAXLABEL 5 6495 #define EMPTYLABEL -1 6496 6497 /* mgroup *********************************************************************/ 6498 6499 static void freegroup(upb_refcounted *r) { 6500 mgroup *g = (mgroup*)r; 6501 upb_inttable_uninit(&g->methods); 6502 #ifdef UPB_USE_JIT_X64 6503 upb_pbdecoder_freejit(g); 6504 #endif 6505 free(g->bytecode); 6506 free(g); 6507 } 6508 6509 static void visitgroup(const upb_refcounted *r, upb_refcounted_visit *visit, 6510 void *closure) { 6511 const mgroup *g = (const mgroup*)r; 6512 upb_inttable_iter i; 6513 upb_inttable_begin(&i, &g->methods); 6514 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 6515 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); 6516 visit(r, upb_pbdecodermethod_upcast(method), closure); 6517 } 6518 } 6519 6520 mgroup *newgroup(const void *owner) { 6521 mgroup *g = malloc(sizeof(*g)); 6522 static const struct upb_refcounted_vtbl vtbl = {visitgroup, freegroup}; 6523 upb_refcounted_init(mgroup_upcast_mutable(g), &vtbl, owner); 6524 upb_inttable_init(&g->methods, UPB_CTYPE_PTR); 6525 g->bytecode = NULL; 6526 g->bytecode_end = NULL; 6527 return g; 6528 } 6529 6530 6531 /* upb_pbdecodermethod ********************************************************/ 6532 6533 static void freemethod(upb_refcounted *r) { 6534 upb_pbdecodermethod *method = (upb_pbdecodermethod*)r; 6535 6536 if (method->dest_handlers_) { 6537 upb_handlers_unref(method->dest_handlers_, method); 6538 } 6539 6540 upb_inttable_uninit(&method->dispatch); 6541 free(method); 6542 } 6543 6544 static void visitmethod(const upb_refcounted *r, upb_refcounted_visit *visit, 6545 void *closure) { 6546 const upb_pbdecodermethod *m = (const upb_pbdecodermethod*)r; 6547 visit(r, m->group, closure); 6548 } 6549 6550 static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers, 6551 mgroup *group) { 6552 static const struct upb_refcounted_vtbl vtbl = {visitmethod, freemethod}; 6553 upb_pbdecodermethod *ret = malloc(sizeof(*ret)); 6554 upb_refcounted_init(upb_pbdecodermethod_upcast_mutable(ret), &vtbl, &ret); 6555 upb_byteshandler_init(&ret->input_handler_); 6556 6557 /* The method references the group and vice-versa, in a circular reference. */ 6558 upb_ref2(ret, group); 6559 upb_ref2(group, ret); 6560 upb_inttable_insertptr(&group->methods, dest_handlers, upb_value_ptr(ret)); 6561 upb_pbdecodermethod_unref(ret, &ret); 6562 6563 ret->group = mgroup_upcast_mutable(group); 6564 ret->dest_handlers_ = dest_handlers; 6565 ret->is_native_ = false; /* If we JIT, it will update this later. */ 6566 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); 6567 6568 if (ret->dest_handlers_) { 6569 upb_handlers_ref(ret->dest_handlers_, ret); 6570 } 6571 return ret; 6572 } 6573 6574 const upb_handlers *upb_pbdecodermethod_desthandlers( 6575 const upb_pbdecodermethod *m) { 6576 return m->dest_handlers_; 6577 } 6578 6579 const upb_byteshandler *upb_pbdecodermethod_inputhandler( 6580 const upb_pbdecodermethod *m) { 6581 return &m->input_handler_; 6582 } 6583 6584 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) { 6585 return m->is_native_; 6586 } 6587 6588 const upb_pbdecodermethod *upb_pbdecodermethod_new( 6589 const upb_pbdecodermethodopts *opts, const void *owner) { 6590 const upb_pbdecodermethod *ret; 6591 upb_pbcodecache cache; 6592 6593 upb_pbcodecache_init(&cache); 6594 ret = upb_pbcodecache_getdecodermethod(&cache, opts); 6595 upb_pbdecodermethod_ref(ret, owner); 6596 upb_pbcodecache_uninit(&cache); 6597 return ret; 6598 } 6599 6600 6601 /* bytecode compiler **********************************************************/ 6602 6603 /* Data used only at compilation time. */ 6604 typedef struct { 6605 mgroup *group; 6606 6607 uint32_t *pc; 6608 int fwd_labels[MAXLABEL]; 6609 int back_labels[MAXLABEL]; 6610 6611 /* For fields marked "lazy", parse them lazily or eagerly? */ 6612 bool lazy; 6613 } compiler; 6614 6615 static compiler *newcompiler(mgroup *group, bool lazy) { 6616 compiler *ret = malloc(sizeof(*ret)); 6617 int i; 6618 6619 ret->group = group; 6620 ret->lazy = lazy; 6621 for (i = 0; i < MAXLABEL; i++) { 6622 ret->fwd_labels[i] = EMPTYLABEL; 6623 ret->back_labels[i] = EMPTYLABEL; 6624 } 6625 return ret; 6626 } 6627 6628 static void freecompiler(compiler *c) { 6629 free(c); 6630 } 6631 6632 const size_t ptr_words = sizeof(void*) / sizeof(uint32_t); 6633 6634 /* How many words an instruction is. */ 6635 static int instruction_len(uint32_t instr) { 6636 switch (getop(instr)) { 6637 case OP_SETDISPATCH: return 1 + ptr_words; 6638 case OP_TAGN: return 3; 6639 case OP_SETBIGGROUPNUM: return 2; 6640 default: return 1; 6641 } 6642 } 6643 6644 bool op_has_longofs(int32_t instruction) { 6645 switch (getop(instruction)) { 6646 case OP_CALL: 6647 case OP_BRANCH: 6648 case OP_CHECKDELIM: 6649 return true; 6650 /* The "tag" instructions only have 8 bytes available for the jump target, 6651 * but that is ok because these opcodes only require short jumps. */ 6652 case OP_TAG1: 6653 case OP_TAG2: 6654 case OP_TAGN: 6655 return false; 6656 default: 6657 assert(false); 6658 return false; 6659 } 6660 } 6661 6662 static int32_t getofs(uint32_t instruction) { 6663 if (op_has_longofs(instruction)) { 6664 return (int32_t)instruction >> 8; 6665 } else { 6666 return (int8_t)(instruction >> 8); 6667 } 6668 } 6669 6670 static void setofs(uint32_t *instruction, int32_t ofs) { 6671 if (op_has_longofs(*instruction)) { 6672 *instruction = getop(*instruction) | ofs << 8; 6673 } else { 6674 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8); 6675 } 6676 assert(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */ 6677 } 6678 6679 static uint32_t pcofs(compiler *c) { return c->pc - c->group->bytecode; } 6680 6681 /* Defines a local label at the current PC location. All previous forward 6682 * references are updated to point to this location. The location is noted 6683 * for any future backward references. */ 6684 static void label(compiler *c, unsigned int label) { 6685 int val; 6686 uint32_t *codep; 6687 6688 assert(label < MAXLABEL); 6689 val = c->fwd_labels[label]; 6690 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val; 6691 while (codep) { 6692 int ofs = getofs(*codep); 6693 setofs(codep, c->pc - codep - instruction_len(*codep)); 6694 codep = ofs ? codep + ofs : NULL; 6695 } 6696 c->fwd_labels[label] = EMPTYLABEL; 6697 c->back_labels[label] = pcofs(c); 6698 } 6699 6700 /* Creates a reference to a numbered label; either a forward reference 6701 * (positive arg) or backward reference (negative arg). For forward references 6702 * the value returned now is actually a "next" pointer into a linked list of all 6703 * instructions that use this label and will be patched later when the label is 6704 * defined with label(). 6705 * 6706 * The returned value is the offset that should be written into the instruction. 6707 */ 6708 static int32_t labelref(compiler *c, int label) { 6709 assert(label < MAXLABEL); 6710 if (label == LABEL_DISPATCH) { 6711 /* No resolving required. */ 6712 return 0; 6713 } else if (label < 0) { 6714 /* Backward local label. Relative to the next instruction. */ 6715 uint32_t from = (c->pc + 1) - c->group->bytecode; 6716 return c->back_labels[-label] - from; 6717 } else { 6718 /* Forward local label: prepend to (possibly-empty) linked list. */ 6719 int *lptr = &c->fwd_labels[label]; 6720 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c); 6721 *lptr = pcofs(c); 6722 return ret; 6723 } 6724 } 6725 6726 static void put32(compiler *c, uint32_t v) { 6727 mgroup *g = c->group; 6728 if (c->pc == g->bytecode_end) { 6729 int ofs = pcofs(c); 6730 size_t oldsize = g->bytecode_end - g->bytecode; 6731 size_t newsize = UPB_MAX(oldsize * 2, 64); 6732 /* TODO(haberman): handle OOM. */ 6733 g->bytecode = realloc(g->bytecode, newsize * sizeof(uint32_t)); 6734 g->bytecode_end = g->bytecode + newsize; 6735 c->pc = g->bytecode + ofs; 6736 } 6737 *c->pc++ = v; 6738 } 6739 6740 static void putop(compiler *c, opcode op, ...) { 6741 va_list ap; 6742 va_start(ap, op); 6743 6744 switch (op) { 6745 case OP_SETDISPATCH: { 6746 uintptr_t ptr = (uintptr_t)va_arg(ap, void*); 6747 put32(c, OP_SETDISPATCH); 6748 put32(c, ptr); 6749 if (sizeof(uintptr_t) > sizeof(uint32_t)) 6750 put32(c, (uint64_t)ptr >> 32); 6751 break; 6752 } 6753 case OP_STARTMSG: 6754 case OP_ENDMSG: 6755 case OP_PUSHLENDELIM: 6756 case OP_POP: 6757 case OP_SETDELIM: 6758 case OP_HALT: 6759 case OP_RET: 6760 case OP_DISPATCH: 6761 put32(c, op); 6762 break; 6763 case OP_PARSE_DOUBLE: 6764 case OP_PARSE_FLOAT: 6765 case OP_PARSE_INT64: 6766 case OP_PARSE_UINT64: 6767 case OP_PARSE_INT32: 6768 case OP_PARSE_FIXED64: 6769 case OP_PARSE_FIXED32: 6770 case OP_PARSE_BOOL: 6771 case OP_PARSE_UINT32: 6772 case OP_PARSE_SFIXED32: 6773 case OP_PARSE_SFIXED64: 6774 case OP_PARSE_SINT32: 6775 case OP_PARSE_SINT64: 6776 case OP_STARTSEQ: 6777 case OP_ENDSEQ: 6778 case OP_STARTSUBMSG: 6779 case OP_ENDSUBMSG: 6780 case OP_STARTSTR: 6781 case OP_STRING: 6782 case OP_ENDSTR: 6783 case OP_PUSHTAGDELIM: 6784 put32(c, op | va_arg(ap, upb_selector_t) << 8); 6785 break; 6786 case OP_SETBIGGROUPNUM: 6787 put32(c, op); 6788 put32(c, va_arg(ap, int)); 6789 break; 6790 case OP_CALL: { 6791 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *); 6792 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8); 6793 break; 6794 } 6795 case OP_CHECKDELIM: 6796 case OP_BRANCH: { 6797 uint32_t instruction = op; 6798 int label = va_arg(ap, int); 6799 setofs(&instruction, labelref(c, label)); 6800 put32(c, instruction); 6801 break; 6802 } 6803 case OP_TAG1: 6804 case OP_TAG2: { 6805 int label = va_arg(ap, int); 6806 uint64_t tag = va_arg(ap, uint64_t); 6807 uint32_t instruction = op | (tag << 16); 6808 assert(tag <= 0xffff); 6809 setofs(&instruction, labelref(c, label)); 6810 put32(c, instruction); 6811 break; 6812 } 6813 case OP_TAGN: { 6814 int label = va_arg(ap, int); 6815 uint64_t tag = va_arg(ap, uint64_t); 6816 uint32_t instruction = op | (upb_value_size(tag) << 16); 6817 setofs(&instruction, labelref(c, label)); 6818 put32(c, instruction); 6819 put32(c, tag); 6820 put32(c, tag >> 32); 6821 break; 6822 } 6823 } 6824 6825 va_end(ap); 6826 } 6827 6828 #if defined(UPB_USE_JIT_X64) || defined(UPB_DUMP_BYTECODE) 6829 6830 const char *upb_pbdecoder_getopname(unsigned int op) { 6831 #define QUOTE(x) #x 6832 #define EXPAND_AND_QUOTE(x) QUOTE(x) 6833 #define OPNAME(x) OP_##x 6834 #define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x)); 6835 #define T(x) OP(PARSE_##x) 6836 /* Keep in sync with list in decoder.int.h. */ 6837 switch ((opcode)op) { 6838 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32) 6839 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64) 6840 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG) 6841 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET) 6842 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM) 6843 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP) 6844 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT) 6845 } 6846 return "<unknown op>"; 6847 #undef OP 6848 #undef T 6849 } 6850 6851 #endif 6852 6853 #ifdef UPB_DUMP_BYTECODE 6854 6855 static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { 6856 6857 uint32_t *begin = p; 6858 6859 while (p < end) { 6860 fprintf(f, "%p %8tx", p, p - begin); 6861 uint32_t instr = *p++; 6862 uint8_t op = getop(instr); 6863 fprintf(f, " %s", upb_pbdecoder_getopname(op)); 6864 switch ((opcode)op) { 6865 case OP_SETDISPATCH: { 6866 const upb_inttable *dispatch; 6867 memcpy(&dispatch, p, sizeof(void*)); 6868 p += ptr_words; 6869 const upb_pbdecodermethod *method = 6870 (void *)((char *)dispatch - 6871 offsetof(upb_pbdecodermethod, dispatch)); 6872 fprintf(f, " %s", upb_msgdef_fullname( 6873 upb_handlers_msgdef(method->dest_handlers_))); 6874 break; 6875 } 6876 case OP_DISPATCH: 6877 case OP_STARTMSG: 6878 case OP_ENDMSG: 6879 case OP_PUSHLENDELIM: 6880 case OP_POP: 6881 case OP_SETDELIM: 6882 case OP_HALT: 6883 case OP_RET: 6884 break; 6885 case OP_PARSE_DOUBLE: 6886 case OP_PARSE_FLOAT: 6887 case OP_PARSE_INT64: 6888 case OP_PARSE_UINT64: 6889 case OP_PARSE_INT32: 6890 case OP_PARSE_FIXED64: 6891 case OP_PARSE_FIXED32: 6892 case OP_PARSE_BOOL: 6893 case OP_PARSE_UINT32: 6894 case OP_PARSE_SFIXED32: 6895 case OP_PARSE_SFIXED64: 6896 case OP_PARSE_SINT32: 6897 case OP_PARSE_SINT64: 6898 case OP_STARTSEQ: 6899 case OP_ENDSEQ: 6900 case OP_STARTSUBMSG: 6901 case OP_ENDSUBMSG: 6902 case OP_STARTSTR: 6903 case OP_STRING: 6904 case OP_ENDSTR: 6905 case OP_PUSHTAGDELIM: 6906 fprintf(f, " %d", instr >> 8); 6907 break; 6908 case OP_SETBIGGROUPNUM: 6909 fprintf(f, " %d", *p++); 6910 break; 6911 case OP_CHECKDELIM: 6912 case OP_CALL: 6913 case OP_BRANCH: 6914 fprintf(f, " =>0x%tx", p + getofs(instr) - begin); 6915 break; 6916 case OP_TAG1: 6917 case OP_TAG2: { 6918 fprintf(f, " tag:0x%x", instr >> 16); 6919 if (getofs(instr)) { 6920 fprintf(f, " =>0x%tx", p + getofs(instr) - begin); 6921 } 6922 break; 6923 } 6924 case OP_TAGN: { 6925 uint64_t tag = *p++; 6926 tag |= (uint64_t)*p++ << 32; 6927 fprintf(f, " tag:0x%llx", (long long)tag); 6928 fprintf(f, " n:%d", instr >> 16); 6929 if (getofs(instr)) { 6930 fprintf(f, " =>0x%tx", p + getofs(instr) - begin); 6931 } 6932 break; 6933 } 6934 } 6935 fputs("\n", f); 6936 } 6937 } 6938 6939 #endif 6940 6941 static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) { 6942 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type; 6943 uint64_t encoded_tag = upb_vencode32(tag); 6944 /* No tag should be greater than 5 bytes. */ 6945 assert(encoded_tag <= 0xffffffffff); 6946 return encoded_tag; 6947 } 6948 6949 static void putchecktag(compiler *c, const upb_fielddef *f, 6950 int wire_type, int dest) { 6951 uint64_t tag = get_encoded_tag(f, wire_type); 6952 switch (upb_value_size(tag)) { 6953 case 1: 6954 putop(c, OP_TAG1, dest, tag); 6955 break; 6956 case 2: 6957 putop(c, OP_TAG2, dest, tag); 6958 break; 6959 default: 6960 putop(c, OP_TAGN, dest, tag); 6961 break; 6962 } 6963 } 6964 6965 static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { 6966 upb_selector_t selector; 6967 bool ok = upb_handlers_getselector(f, type, &selector); 6968 UPB_ASSERT_VAR(ok, ok); 6969 return selector; 6970 } 6971 6972 /* Takes an existing, primary dispatch table entry and repacks it with a 6973 * different alternate wire type. Called when we are inserting a secondary 6974 * dispatch table entry for an alternate wire type. */ 6975 static uint64_t repack(uint64_t dispatch, int new_wt2) { 6976 uint64_t ofs; 6977 uint8_t wt1; 6978 uint8_t old_wt2; 6979 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2); 6980 assert(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */ 6981 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2); 6982 } 6983 6984 /* Marks the current bytecode position as the dispatch target for this message, 6985 * field, and wire type. */ 6986 static void dispatchtarget(compiler *c, upb_pbdecodermethod *method, 6987 const upb_fielddef *f, int wire_type) { 6988 /* Offset is relative to msg base. */ 6989 uint64_t ofs = pcofs(c) - method->code_base.ofs; 6990 uint32_t fn = upb_fielddef_number(f); 6991 upb_inttable *d = &method->dispatch; 6992 upb_value v; 6993 if (upb_inttable_remove(d, fn, &v)) { 6994 /* TODO: prioritize based on packed setting in .proto file. */ 6995 uint64_t repacked = repack(upb_value_getuint64(v), wire_type); 6996 upb_inttable_insert(d, fn, upb_value_uint64(repacked)); 6997 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs)); 6998 } else { 6999 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE); 7000 upb_inttable_insert(d, fn, upb_value_uint64(val)); 7001 } 7002 } 7003 7004 static void putpush(compiler *c, const upb_fielddef *f) { 7005 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { 7006 putop(c, OP_PUSHLENDELIM); 7007 } else { 7008 uint32_t fn = upb_fielddef_number(f); 7009 if (fn >= 1 << 24) { 7010 putop(c, OP_PUSHTAGDELIM, 0); 7011 putop(c, OP_SETBIGGROUPNUM, fn); 7012 } else { 7013 putop(c, OP_PUSHTAGDELIM, fn); 7014 } 7015 } 7016 } 7017 7018 static upb_pbdecodermethod *find_submethod(const compiler *c, 7019 const upb_pbdecodermethod *method, 7020 const upb_fielddef *f) { 7021 const upb_handlers *sub = 7022 upb_handlers_getsubhandlers(method->dest_handlers_, f); 7023 upb_value v; 7024 return upb_inttable_lookupptr(&c->group->methods, sub, &v) 7025 ? upb_value_getptr(v) 7026 : NULL; 7027 } 7028 7029 static void putsel(compiler *c, opcode op, upb_selector_t sel, 7030 const upb_handlers *h) { 7031 if (upb_handlers_gethandler(h, sel)) { 7032 putop(c, op, sel); 7033 } 7034 } 7035 7036 /* Puts an opcode to call a callback, but only if a callback actually exists for 7037 * this field and handler type. */ 7038 static void maybeput(compiler *c, opcode op, const upb_handlers *h, 7039 const upb_fielddef *f, upb_handlertype_t type) { 7040 putsel(c, op, getsel(f, type), h); 7041 } 7042 7043 static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) { 7044 if (!upb_fielddef_lazy(f)) 7045 return false; 7046 7047 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR)) || 7048 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING)) || 7049 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR)); 7050 } 7051 7052 7053 /* bytecode compiler code generation ******************************************/ 7054 7055 /* Symbolic names for our local labels. */ 7056 #define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */ 7057 #define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */ 7058 #define LABEL_FIELD 3 /* Jump backward to find the most recent field. */ 7059 #define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */ 7060 7061 /* Generates bytecode to parse a single non-lazy message field. */ 7062 static void generate_msgfield(compiler *c, const upb_fielddef *f, 7063 upb_pbdecodermethod *method) { 7064 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); 7065 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); 7066 int wire_type; 7067 7068 if (!sub_m) { 7069 /* Don't emit any code for this field at all; it will be parsed as an 7070 * unknown field. */ 7071 return; 7072 } 7073 7074 label(c, LABEL_FIELD); 7075 7076 wire_type = 7077 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) 7078 ? UPB_WIRE_TYPE_DELIMITED 7079 : UPB_WIRE_TYPE_START_GROUP; 7080 7081 if (upb_fielddef_isseq(f)) { 7082 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7083 putchecktag(c, f, wire_type, LABEL_DISPATCH); 7084 dispatchtarget(c, method, f, wire_type); 7085 putop(c, OP_PUSHTAGDELIM, 0); 7086 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); 7087 label(c, LABEL_LOOPSTART); 7088 putpush(c, f); 7089 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); 7090 putop(c, OP_CALL, sub_m); 7091 putop(c, OP_POP); 7092 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); 7093 if (wire_type == UPB_WIRE_TYPE_DELIMITED) { 7094 putop(c, OP_SETDELIM); 7095 } 7096 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 7097 putchecktag(c, f, wire_type, LABEL_LOOPBREAK); 7098 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 7099 label(c, LABEL_LOOPBREAK); 7100 putop(c, OP_POP); 7101 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); 7102 } else { 7103 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7104 putchecktag(c, f, wire_type, LABEL_DISPATCH); 7105 dispatchtarget(c, method, f, wire_type); 7106 putpush(c, f); 7107 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); 7108 putop(c, OP_CALL, sub_m); 7109 putop(c, OP_POP); 7110 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); 7111 if (wire_type == UPB_WIRE_TYPE_DELIMITED) { 7112 putop(c, OP_SETDELIM); 7113 } 7114 } 7115 } 7116 7117 /* Generates bytecode to parse a single string or lazy submessage field. */ 7118 static void generate_delimfield(compiler *c, const upb_fielddef *f, 7119 upb_pbdecodermethod *method) { 7120 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); 7121 7122 label(c, LABEL_FIELD); 7123 if (upb_fielddef_isseq(f)) { 7124 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7125 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); 7126 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); 7127 putop(c, OP_PUSHTAGDELIM, 0); 7128 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); 7129 label(c, LABEL_LOOPSTART); 7130 putop(c, OP_PUSHLENDELIM); 7131 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); 7132 /* Need to emit even if no handler to skip past the string. */ 7133 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); 7134 putop(c, OP_POP); 7135 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); 7136 putop(c, OP_SETDELIM); 7137 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 7138 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); 7139 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 7140 label(c, LABEL_LOOPBREAK); 7141 putop(c, OP_POP); 7142 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); 7143 } else { 7144 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7145 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); 7146 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); 7147 putop(c, OP_PUSHLENDELIM); 7148 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); 7149 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); 7150 putop(c, OP_POP); 7151 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); 7152 putop(c, OP_SETDELIM); 7153 } 7154 } 7155 7156 /* Generates bytecode to parse a single primitive field. */ 7157 static void generate_primitivefield(compiler *c, const upb_fielddef *f, 7158 upb_pbdecodermethod *method) { 7159 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); 7160 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); 7161 opcode parse_type; 7162 upb_selector_t sel; 7163 int wire_type; 7164 7165 label(c, LABEL_FIELD); 7166 7167 /* From a decoding perspective, ENUM is the same as INT32. */ 7168 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) 7169 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; 7170 7171 parse_type = (opcode)descriptor_type; 7172 7173 /* TODO(haberman): generate packed or non-packed first depending on "packed" 7174 * setting in the fielddef. This will favor (in speed) whichever was 7175 * specified. */ 7176 7177 assert((int)parse_type >= 0 && parse_type <= OP_MAX); 7178 sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); 7179 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; 7180 if (upb_fielddef_isseq(f)) { 7181 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7182 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); 7183 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); 7184 putop(c, OP_PUSHLENDELIM); 7185 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */ 7186 label(c, LABEL_LOOPSTART); 7187 putop(c, parse_type, sel); 7188 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 7189 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 7190 dispatchtarget(c, method, f, wire_type); 7191 putop(c, OP_PUSHTAGDELIM, 0); 7192 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */ 7193 label(c, LABEL_LOOPSTART); 7194 putop(c, parse_type, sel); 7195 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 7196 putchecktag(c, f, wire_type, LABEL_LOOPBREAK); 7197 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 7198 label(c, LABEL_LOOPBREAK); 7199 putop(c, OP_POP); /* Packed and non-packed join. */ 7200 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); 7201 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */ 7202 } else { 7203 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7204 putchecktag(c, f, wire_type, LABEL_DISPATCH); 7205 dispatchtarget(c, method, f, wire_type); 7206 putop(c, parse_type, sel); 7207 } 7208 } 7209 7210 /* Adds bytecode for parsing the given message to the given decoderplan, 7211 * while adding all dispatch targets to this message's dispatch table. */ 7212 static void compile_method(compiler *c, upb_pbdecodermethod *method) { 7213 const upb_handlers *h; 7214 const upb_msgdef *md; 7215 uint32_t* start_pc; 7216 upb_msg_field_iter i; 7217 upb_value val; 7218 7219 assert(method); 7220 7221 /* Clear all entries in the dispatch table. */ 7222 upb_inttable_uninit(&method->dispatch); 7223 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); 7224 7225 h = upb_pbdecodermethod_desthandlers(method); 7226 md = upb_handlers_msgdef(h); 7227 7228 method->code_base.ofs = pcofs(c); 7229 putop(c, OP_SETDISPATCH, &method->dispatch); 7230 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); 7231 label(c, LABEL_FIELD); 7232 start_pc = c->pc; 7233 for(upb_msg_field_begin(&i, md); 7234 !upb_msg_field_done(&i); 7235 upb_msg_field_next(&i)) { 7236 const upb_fielddef *f = upb_msg_iter_field(&i); 7237 upb_fieldtype_t type = upb_fielddef_type(f); 7238 7239 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) { 7240 generate_msgfield(c, f, method); 7241 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES || 7242 type == UPB_TYPE_MESSAGE) { 7243 generate_delimfield(c, f, method); 7244 } else { 7245 generate_primitivefield(c, f, method); 7246 } 7247 } 7248 7249 /* If there were no fields, or if no handlers were defined, we need to 7250 * generate a non-empty loop body so that we can at least dispatch for unknown 7251 * fields and check for the end of the message. */ 7252 if (c->pc == start_pc) { 7253 /* Check for end-of-message. */ 7254 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 7255 /* Unconditionally dispatch. */ 7256 putop(c, OP_DISPATCH, 0); 7257 } 7258 7259 /* For now we just loop back to the last field of the message (or if none, 7260 * the DISPATCH opcode for the message). */ 7261 putop(c, OP_BRANCH, -LABEL_FIELD); 7262 7263 /* Insert both a label and a dispatch table entry for this end-of-msg. */ 7264 label(c, LABEL_ENDMSG); 7265 val = upb_value_uint64(pcofs(c) - method->code_base.ofs); 7266 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val); 7267 7268 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h); 7269 putop(c, OP_RET); 7270 7271 upb_inttable_compact(&method->dispatch); 7272 } 7273 7274 /* Populate "methods" with new upb_pbdecodermethod objects reachable from "h". 7275 * Returns the method for these handlers. 7276 * 7277 * Generates a new method for every destination handlers reachable from "h". */ 7278 static void find_methods(compiler *c, const upb_handlers *h) { 7279 upb_value v; 7280 upb_msg_field_iter i; 7281 const upb_msgdef *md; 7282 7283 if (upb_inttable_lookupptr(&c->group->methods, h, &v)) 7284 return; 7285 newmethod(h, c->group); 7286 7287 /* Find submethods. */ 7288 md = upb_handlers_msgdef(h); 7289 for(upb_msg_field_begin(&i, md); 7290 !upb_msg_field_done(&i); 7291 upb_msg_field_next(&i)) { 7292 const upb_fielddef *f = upb_msg_iter_field(&i); 7293 const upb_handlers *sub_h; 7294 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && 7295 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) { 7296 /* We only generate a decoder method for submessages with handlers. 7297 * Others will be parsed as unknown fields. */ 7298 find_methods(c, sub_h); 7299 } 7300 } 7301 } 7302 7303 /* (Re-)compile bytecode for all messages in "msgs." 7304 * Overwrites any existing bytecode in "c". */ 7305 static void compile_methods(compiler *c) { 7306 upb_inttable_iter i; 7307 7308 /* Start over at the beginning of the bytecode. */ 7309 c->pc = c->group->bytecode; 7310 7311 upb_inttable_begin(&i, &c->group->methods); 7312 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 7313 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); 7314 compile_method(c, method); 7315 } 7316 } 7317 7318 static void set_bytecode_handlers(mgroup *g) { 7319 upb_inttable_iter i; 7320 upb_inttable_begin(&i, &g->methods); 7321 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 7322 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i)); 7323 upb_byteshandler *h = &m->input_handler_; 7324 7325 m->code_base.ptr = g->bytecode + m->code_base.ofs; 7326 7327 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr); 7328 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g); 7329 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m); 7330 } 7331 } 7332 7333 7334 /* JIT setup. *****************************************************************/ 7335 7336 #ifdef UPB_USE_JIT_X64 7337 7338 static void sethandlers(mgroup *g, bool allowjit) { 7339 g->jit_code = NULL; 7340 if (allowjit) { 7341 /* Compile byte-code into machine code, create handlers. */ 7342 upb_pbdecoder_jit(g); 7343 } else { 7344 set_bytecode_handlers(g); 7345 } 7346 } 7347 7348 #else /* UPB_USE_JIT_X64 */ 7349 7350 static void sethandlers(mgroup *g, bool allowjit) { 7351 /* No JIT compiled in; use bytecode handlers unconditionally. */ 7352 UPB_UNUSED(allowjit); 7353 set_bytecode_handlers(g); 7354 } 7355 7356 #endif /* UPB_USE_JIT_X64 */ 7357 7358 7359 /* TODO(haberman): allow this to be constructed for an arbitrary set of dest 7360 * handlers and other mgroups (but verify we have a transitive closure). */ 7361 const mgroup *mgroup_new(const upb_handlers *dest, bool allowjit, bool lazy, 7362 const void *owner) { 7363 mgroup *g; 7364 compiler *c; 7365 7366 UPB_UNUSED(allowjit); 7367 assert(upb_handlers_isfrozen(dest)); 7368 7369 g = newgroup(owner); 7370 c = newcompiler(g, lazy); 7371 find_methods(c, dest); 7372 7373 /* We compile in two passes: 7374 * 1. all messages are assigned relative offsets from the beginning of the 7375 * bytecode (saved in method->code_base). 7376 * 2. forwards OP_CALL instructions can be correctly linked since message 7377 * offsets have been previously assigned. 7378 * 7379 * Could avoid the second pass by linking OP_CALL instructions somehow. */ 7380 compile_methods(c); 7381 compile_methods(c); 7382 g->bytecode_end = c->pc; 7383 freecompiler(c); 7384 7385 #ifdef UPB_DUMP_BYTECODE 7386 { 7387 FILE *f = fopen("/tmp/upb-bytecode", "wb"); 7388 assert(f); 7389 dumpbc(g->bytecode, g->bytecode_end, stderr); 7390 dumpbc(g->bytecode, g->bytecode_end, f); 7391 fclose(f); 7392 } 7393 #endif 7394 7395 sethandlers(g, allowjit); 7396 return g; 7397 } 7398 7399 7400 /* upb_pbcodecache ************************************************************/ 7401 7402 void upb_pbcodecache_init(upb_pbcodecache *c) { 7403 upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR); 7404 c->allow_jit_ = true; 7405 } 7406 7407 void upb_pbcodecache_uninit(upb_pbcodecache *c) { 7408 upb_inttable_iter i; 7409 upb_inttable_begin(&i, &c->groups); 7410 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 7411 const mgroup *group = upb_value_getconstptr(upb_inttable_iter_value(&i)); 7412 mgroup_unref(group, c); 7413 } 7414 upb_inttable_uninit(&c->groups); 7415 } 7416 7417 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c) { 7418 return c->allow_jit_; 7419 } 7420 7421 bool upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow) { 7422 if (upb_inttable_count(&c->groups) > 0) 7423 return false; 7424 c->allow_jit_ = allow; 7425 return true; 7426 } 7427 7428 const upb_pbdecodermethod *upb_pbcodecache_getdecodermethod( 7429 upb_pbcodecache *c, const upb_pbdecodermethodopts *opts) { 7430 upb_value v; 7431 bool ok; 7432 7433 /* Right now we build a new DecoderMethod every time. 7434 * TODO(haberman): properly cache methods by their true key. */ 7435 const mgroup *g = mgroup_new(opts->handlers, c->allow_jit_, opts->lazy, c); 7436 upb_inttable_push(&c->groups, upb_value_constptr(g)); 7437 7438 ok = upb_inttable_lookupptr(&g->methods, opts->handlers, &v); 7439 UPB_ASSERT_VAR(ok, ok); 7440 return upb_value_getptr(v); 7441 } 7442 7443 7444 /* upb_pbdecodermethodopts ****************************************************/ 7445 7446 void upb_pbdecodermethodopts_init(upb_pbdecodermethodopts *opts, 7447 const upb_handlers *h) { 7448 opts->handlers = h; 7449 opts->lazy = false; 7450 } 7451 7452 void upb_pbdecodermethodopts_setlazy(upb_pbdecodermethodopts *opts, bool lazy) { 7453 opts->lazy = lazy; 7454 } 7455 /* 7456 ** upb::Decoder (Bytecode Decoder VM) 7457 ** 7458 ** Bytecode must previously have been generated using the bytecode compiler in 7459 ** compile_decoder.c. This decoder then walks through the bytecode op-by-op to 7460 ** parse the input. 7461 ** 7462 ** Decoding is fully resumable; we just keep a pointer to the current bytecode 7463 ** instruction and resume from there. A fair amount of the logic here is to 7464 ** handle the fact that values can span buffer seams and we have to be able to 7465 ** be capable of suspending/resuming from any byte in the stream. This 7466 ** sometimes requires keeping a few trailing bytes from the last buffer around 7467 ** in the "residual" buffer. 7468 */ 7469 7470 #include <inttypes.h> 7471 #include <stddef.h> 7472 7473 #ifdef UPB_DUMP_BYTECODE 7474 #include <stdio.h> 7475 #endif 7476 7477 #define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); 7478 7479 /* Error messages that are shared between the bytecode and JIT decoders. */ 7480 const char *kPbDecoderStackOverflow = "Nesting too deep."; 7481 const char *kPbDecoderSubmessageTooLong = 7482 "Submessage end extends past enclosing submessage."; 7483 7484 /* Error messages shared within this file. */ 7485 static const char *kUnterminatedVarint = "Unterminated varint."; 7486 7487 /* upb_pbdecoder **************************************************************/ 7488 7489 static opcode halt = OP_HALT; 7490 7491 /* Whether an op consumes any of the input buffer. */ 7492 static bool consumes_input(opcode op) { 7493 switch (op) { 7494 case OP_SETDISPATCH: 7495 case OP_STARTMSG: 7496 case OP_ENDMSG: 7497 case OP_STARTSEQ: 7498 case OP_ENDSEQ: 7499 case OP_STARTSUBMSG: 7500 case OP_ENDSUBMSG: 7501 case OP_STARTSTR: 7502 case OP_ENDSTR: 7503 case OP_PUSHTAGDELIM: 7504 case OP_POP: 7505 case OP_SETDELIM: 7506 case OP_SETBIGGROUPNUM: 7507 case OP_CHECKDELIM: 7508 case OP_CALL: 7509 case OP_RET: 7510 case OP_BRANCH: 7511 return false; 7512 default: 7513 return true; 7514 } 7515 } 7516 7517 static size_t stacksize(upb_pbdecoder *d, size_t entries) { 7518 UPB_UNUSED(d); 7519 return entries * sizeof(upb_pbdecoder_frame); 7520 } 7521 7522 static size_t callstacksize(upb_pbdecoder *d, size_t entries) { 7523 UPB_UNUSED(d); 7524 7525 #ifdef UPB_USE_JIT_X64 7526 if (d->method_->is_native_) { 7527 /* Each native stack frame needs two pointers, plus we need a few frames for 7528 * the enter/exit trampolines. */ 7529 size_t ret = entries * sizeof(void*) * 2; 7530 ret += sizeof(void*) * 10; 7531 return ret; 7532 } 7533 #endif 7534 7535 return entries * sizeof(uint32_t*); 7536 } 7537 7538 7539 static bool in_residual_buf(const upb_pbdecoder *d, const char *p); 7540 7541 /* It's unfortunate that we have to micro-manage the compiler with 7542 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily 7543 * specific to one hardware configuration. But empirically on a Core i7, 7544 * performance increases 30-50% with these annotations. Every instance where 7545 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in 7546 * benchmarks. */ 7547 7548 static void seterr(upb_pbdecoder *d, const char *msg) { 7549 upb_status status = UPB_STATUS_INIT; 7550 upb_status_seterrmsg(&status, msg); 7551 upb_env_reporterror(d->env, &status); 7552 } 7553 7554 void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { 7555 seterr(d, msg); 7556 } 7557 7558 7559 /* Buffering ******************************************************************/ 7560 7561 /* We operate on one buffer at a time, which is either the user's buffer passed 7562 * to our "decode" callback or some residual bytes from the previous buffer. */ 7563 7564 /* How many bytes can be safely read from d->ptr without reading past end-of-buf 7565 * or past the current delimited end. */ 7566 static size_t curbufleft(const upb_pbdecoder *d) { 7567 assert(d->data_end >= d->ptr); 7568 return d->data_end - d->ptr; 7569 } 7570 7571 /* How many bytes are available before end-of-buffer. */ 7572 static size_t bufleft(const upb_pbdecoder *d) { 7573 return d->end - d->ptr; 7574 } 7575 7576 /* Overall stream offset of d->ptr. */ 7577 uint64_t offset(const upb_pbdecoder *d) { 7578 return d->bufstart_ofs + (d->ptr - d->buf); 7579 } 7580 7581 /* How many bytes are available before the end of this delimited region. */ 7582 size_t delim_remaining(const upb_pbdecoder *d) { 7583 return d->top->end_ofs - offset(d); 7584 } 7585 7586 /* Advances d->ptr. */ 7587 static void advance(upb_pbdecoder *d, size_t len) { 7588 assert(curbufleft(d) >= len); 7589 d->ptr += len; 7590 } 7591 7592 static bool in_buf(const char *p, const char *buf, const char *end) { 7593 return p >= buf && p <= end; 7594 } 7595 7596 static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { 7597 return in_buf(p, d->residual, d->residual_end); 7598 } 7599 7600 /* Calculates the delim_end value, which is affected by both the current buffer 7601 * and the parsing stack, so must be called whenever either is updated. */ 7602 static void set_delim_end(upb_pbdecoder *d) { 7603 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; 7604 if (delim_ofs <= (size_t)(d->end - d->buf)) { 7605 d->delim_end = d->buf + delim_ofs; 7606 d->data_end = d->delim_end; 7607 } else { 7608 d->data_end = d->end; 7609 d->delim_end = NULL; 7610 } 7611 } 7612 7613 static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { 7614 d->ptr = buf; 7615 d->buf = buf; 7616 d->end = end; 7617 set_delim_end(d); 7618 } 7619 7620 static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { 7621 assert(curbufleft(d) == 0); 7622 d->bufstart_ofs += (d->end - d->buf); 7623 switchtobuf(d, buf, buf + len); 7624 } 7625 7626 static void checkpoint(upb_pbdecoder *d) { 7627 /* The assertion here is in the interests of efficiency, not correctness. 7628 * We are trying to ensure that we don't checkpoint() more often than 7629 * necessary. */ 7630 assert(d->checkpoint != d->ptr); 7631 d->checkpoint = d->ptr; 7632 } 7633 7634 /* Skips "bytes" bytes in the stream, which may be more than available. If we 7635 * skip more bytes than are available, we return a long read count to the caller 7636 * indicating how many bytes can be skipped over before passing actual data 7637 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they 7638 * won't actually be read. 7639 */ 7640 static int32_t skip(upb_pbdecoder *d, size_t bytes) { 7641 assert(!in_residual_buf(d, d->ptr) || d->size_param == 0); 7642 assert(d->skip == 0); 7643 if (bytes > delim_remaining(d)) { 7644 seterr(d, "Skipped value extended beyond enclosing submessage."); 7645 return upb_pbdecoder_suspend(d); 7646 } else if (bufleft(d) > bytes) { 7647 /* Skipped data is all in current buffer, and more is still available. */ 7648 advance(d, bytes); 7649 d->skip = 0; 7650 return DECODE_OK; 7651 } else { 7652 /* Skipped data extends beyond currently available buffers. */ 7653 d->pc = d->last; 7654 d->skip = bytes - curbufleft(d); 7655 d->bufstart_ofs += (d->end - d->buf); 7656 d->residual_end = d->residual; 7657 switchtobuf(d, d->residual, d->residual_end); 7658 return d->size_param + d->skip; 7659 } 7660 } 7661 7662 7663 /* Resumes the decoder from an initial state or from a previous suspend. */ 7664 int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, 7665 size_t size, const upb_bufhandle *handle) { 7666 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ 7667 7668 d->buf_param = buf; 7669 d->size_param = size; 7670 d->handle = handle; 7671 7672 if (d->residual_end > d->residual) { 7673 /* We have residual bytes from the last buffer. */ 7674 assert(d->ptr == d->residual); 7675 } else { 7676 switchtobuf(d, buf, buf + size); 7677 } 7678 7679 d->checkpoint = d->ptr; 7680 7681 if (d->skip) { 7682 size_t skip_bytes = d->skip; 7683 d->skip = 0; 7684 CHECK_RETURN(skip(d, skip_bytes)); 7685 d->checkpoint = d->ptr; 7686 } 7687 7688 if (!buf) { 7689 /* NULL buf is ok if its entire span is covered by the "skip" above, but 7690 * by this point we know that "skip" doesn't cover the buffer. */ 7691 seterr(d, "Passed NULL buffer over non-skippable region."); 7692 return upb_pbdecoder_suspend(d); 7693 } 7694 7695 if (d->top->groupnum < 0) { 7696 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); 7697 d->checkpoint = d->ptr; 7698 } 7699 7700 return DECODE_OK; 7701 } 7702 7703 /* Suspends the decoder at the last checkpoint, without saving any residual 7704 * bytes. If there are any unconsumed bytes, returns a short byte count. */ 7705 size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { 7706 d->pc = d->last; 7707 if (d->checkpoint == d->residual) { 7708 /* Checkpoint was in residual buf; no user bytes were consumed. */ 7709 d->ptr = d->residual; 7710 return 0; 7711 } else { 7712 size_t consumed; 7713 assert(!in_residual_buf(d, d->checkpoint)); 7714 assert(d->buf == d->buf_param); 7715 7716 consumed = d->checkpoint - d->buf; 7717 d->bufstart_ofs += consumed; 7718 d->residual_end = d->residual; 7719 switchtobuf(d, d->residual, d->residual_end); 7720 return consumed; 7721 } 7722 } 7723 7724 /* Suspends the decoder at the last checkpoint, and saves any unconsumed 7725 * bytes in our residual buffer. This is necessary if we need more user 7726 * bytes to form a complete value, which might not be contiguous in the 7727 * user's buffers. Always consumes all user bytes. */ 7728 static size_t suspend_save(upb_pbdecoder *d) { 7729 /* We hit end-of-buffer before we could parse a full value. 7730 * Save any unconsumed bytes (if any) to the residual buffer. */ 7731 d->pc = d->last; 7732 7733 if (d->checkpoint == d->residual) { 7734 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */ 7735 assert((d->residual_end - d->residual) + d->size_param <= 7736 sizeof(d->residual)); 7737 if (!in_residual_buf(d, d->ptr)) { 7738 d->bufstart_ofs -= (d->residual_end - d->residual); 7739 } 7740 memcpy(d->residual_end, d->buf_param, d->size_param); 7741 d->residual_end += d->size_param; 7742 } else { 7743 /* Checkpoint was in user buf; old residual bytes not needed. */ 7744 size_t save; 7745 assert(!in_residual_buf(d, d->checkpoint)); 7746 7747 d->ptr = d->checkpoint; 7748 save = curbufleft(d); 7749 assert(save <= sizeof(d->residual)); 7750 memcpy(d->residual, d->ptr, save); 7751 d->residual_end = d->residual + save; 7752 d->bufstart_ofs = offset(d); 7753 } 7754 7755 switchtobuf(d, d->residual, d->residual_end); 7756 return d->size_param; 7757 } 7758 7759 /* Copies the next "bytes" bytes into "buf" and advances the stream. 7760 * Requires that this many bytes are available in the current buffer. */ 7761 UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, 7762 size_t bytes) { 7763 assert(bytes <= curbufleft(d)); 7764 memcpy(buf, d->ptr, bytes); 7765 advance(d, bytes); 7766 } 7767 7768 /* Slow path for getting the next "bytes" bytes, regardless of whether they are 7769 * available in the current buffer or not. Returns a status code as described 7770 * in decoder.int.h. */ 7771 UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf, 7772 size_t bytes) { 7773 const size_t avail = curbufleft(d); 7774 consumebytes(d, buf, avail); 7775 bytes -= avail; 7776 assert(bytes > 0); 7777 if (in_residual_buf(d, d->ptr)) { 7778 advancetobuf(d, d->buf_param, d->size_param); 7779 } 7780 if (curbufleft(d) >= bytes) { 7781 consumebytes(d, (char *)buf + avail, bytes); 7782 return DECODE_OK; 7783 } else if (d->data_end == d->delim_end) { 7784 seterr(d, "Submessage ended in the middle of a value or group"); 7785 return upb_pbdecoder_suspend(d); 7786 } else { 7787 return suspend_save(d); 7788 } 7789 } 7790 7791 /* Gets the next "bytes" bytes, regardless of whether they are available in the 7792 * current buffer or not. Returns a status code as described in decoder.int.h. 7793 */ 7794 UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf, 7795 size_t bytes) { 7796 if (curbufleft(d) >= bytes) { 7797 /* Buffer has enough data to satisfy. */ 7798 consumebytes(d, buf, bytes); 7799 return DECODE_OK; 7800 } else { 7801 return getbytes_slow(d, buf, bytes); 7802 } 7803 } 7804 7805 UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf, 7806 size_t bytes) { 7807 size_t ret = curbufleft(d); 7808 memcpy(buf, d->ptr, ret); 7809 if (in_residual_buf(d, d->ptr)) { 7810 size_t copy = UPB_MIN(bytes - ret, d->size_param); 7811 memcpy((char *)buf + ret, d->buf_param, copy); 7812 ret += copy; 7813 } 7814 return ret; 7815 } 7816 7817 UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf, 7818 size_t bytes) { 7819 if (curbufleft(d) >= bytes) { 7820 memcpy(buf, d->ptr, bytes); 7821 return bytes; 7822 } else { 7823 return peekbytes_slow(d, buf, bytes); 7824 } 7825 } 7826 7827 7828 /* Decoding of wire types *****************************************************/ 7829 7830 /* Slow path for decoding a varint from the current buffer position. 7831 * Returns a status code as described in decoder.int.h. */ 7832 UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, 7833 uint64_t *u64) { 7834 uint8_t byte = 0x80; 7835 int bitpos; 7836 *u64 = 0; 7837 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { 7838 int32_t ret = getbytes(d, &byte, 1); 7839 if (ret >= 0) return ret; 7840 *u64 |= (uint64_t)(byte & 0x7F) << bitpos; 7841 } 7842 if(bitpos == 70 && (byte & 0x80)) { 7843 seterr(d, kUnterminatedVarint); 7844 return upb_pbdecoder_suspend(d); 7845 } 7846 return DECODE_OK; 7847 } 7848 7849 /* Decodes a varint from the current buffer position. 7850 * Returns a status code as described in decoder.int.h. */ 7851 UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { 7852 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) { 7853 *u64 = *d->ptr; 7854 advance(d, 1); 7855 return DECODE_OK; 7856 } else if (curbufleft(d) >= 10) { 7857 /* Fast case. */ 7858 upb_decoderet r = upb_vdecode_fast(d->ptr); 7859 if (r.p == NULL) { 7860 seterr(d, kUnterminatedVarint); 7861 return upb_pbdecoder_suspend(d); 7862 } 7863 advance(d, r.p - d->ptr); 7864 *u64 = r.val; 7865 return DECODE_OK; 7866 } else { 7867 /* Slow case -- varint spans buffer seam. */ 7868 return upb_pbdecoder_decode_varint_slow(d, u64); 7869 } 7870 } 7871 7872 /* Decodes a 32-bit varint from the current buffer position. 7873 * Returns a status code as described in decoder.int.h. */ 7874 UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { 7875 uint64_t u64; 7876 int32_t ret = decode_varint(d, &u64); 7877 if (ret >= 0) return ret; 7878 if (u64 > UINT32_MAX) { 7879 seterr(d, "Unterminated 32-bit varint"); 7880 /* TODO(haberman) guarantee that this function return is >= 0 somehow, 7881 * so we know this path will always be treated as error by our caller. 7882 * Right now the size_t -> int32_t can overflow and produce negative values. 7883 */ 7884 *u32 = 0; 7885 return upb_pbdecoder_suspend(d); 7886 } 7887 *u32 = u64; 7888 return DECODE_OK; 7889 } 7890 7891 /* Decodes a fixed32 from the current buffer position. 7892 * Returns a status code as described in decoder.int.h. 7893 * TODO: proper byte swapping for big-endian machines. */ 7894 UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { 7895 return getbytes(d, u32, 4); 7896 } 7897 7898 /* Decodes a fixed64 from the current buffer position. 7899 * Returns a status code as described in decoder.int.h. 7900 * TODO: proper byte swapping for big-endian machines. */ 7901 UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { 7902 return getbytes(d, u64, 8); 7903 } 7904 7905 /* Non-static versions of the above functions. 7906 * These are called by the JIT for fallback paths. */ 7907 int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) { 7908 return decode_fixed32(d, u32); 7909 } 7910 7911 int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) { 7912 return decode_fixed64(d, u64); 7913 } 7914 7915 static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } 7916 static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } 7917 7918 /* Pushes a frame onto the decoder stack. */ 7919 static bool decoder_push(upb_pbdecoder *d, uint64_t end) { 7920 upb_pbdecoder_frame *fr = d->top; 7921 7922 if (end > fr->end_ofs) { 7923 seterr(d, kPbDecoderSubmessageTooLong); 7924 return false; 7925 } else if (fr == d->limit) { 7926 seterr(d, kPbDecoderStackOverflow); 7927 return false; 7928 } 7929 7930 fr++; 7931 fr->end_ofs = end; 7932 fr->dispatch = NULL; 7933 fr->groupnum = 0; 7934 d->top = fr; 7935 return true; 7936 } 7937 7938 static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { 7939 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence 7940 * field number) prior to hitting any enclosing submessage end, pushing our 7941 * existing delim end prevents us from continuing to parse values from a 7942 * corrupt proto that doesn't give us an END tag in time. */ 7943 if (!decoder_push(d, d->top->end_ofs)) 7944 return false; 7945 d->top->groupnum = arg; 7946 return true; 7947 } 7948 7949 /* Pops a frame from the decoder stack. */ 7950 static void decoder_pop(upb_pbdecoder *d) { d->top--; } 7951 7952 UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, 7953 uint64_t expected) { 7954 uint64_t data = 0; 7955 size_t bytes = upb_value_size(expected); 7956 size_t read = peekbytes(d, &data, bytes); 7957 if (read == bytes && data == expected) { 7958 /* Advance past matched bytes. */ 7959 int32_t ok = getbytes(d, &data, read); 7960 UPB_ASSERT_VAR(ok, ok < 0); 7961 return DECODE_OK; 7962 } else if (read < bytes && memcmp(&data, &expected, read) == 0) { 7963 return suspend_save(d); 7964 } else { 7965 return DECODE_MISMATCH; 7966 } 7967 } 7968 7969 int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, 7970 uint8_t wire_type) { 7971 if (fieldnum >= 0) 7972 goto have_tag; 7973 7974 while (true) { 7975 uint32_t tag; 7976 CHECK_RETURN(decode_v32(d, &tag)); 7977 wire_type = tag & 0x7; 7978 fieldnum = tag >> 3; 7979 7980 have_tag: 7981 if (fieldnum == 0) { 7982 seterr(d, "Saw invalid field number (0)"); 7983 return upb_pbdecoder_suspend(d); 7984 } 7985 7986 /* TODO: deliver to unknown field callback. */ 7987 switch (wire_type) { 7988 case UPB_WIRE_TYPE_32BIT: 7989 CHECK_RETURN(skip(d, 4)); 7990 break; 7991 case UPB_WIRE_TYPE_64BIT: 7992 CHECK_RETURN(skip(d, 8)); 7993 break; 7994 case UPB_WIRE_TYPE_VARINT: { 7995 uint64_t u64; 7996 CHECK_RETURN(decode_varint(d, &u64)); 7997 break; 7998 } 7999 case UPB_WIRE_TYPE_DELIMITED: { 8000 uint32_t len; 8001 CHECK_RETURN(decode_v32(d, &len)); 8002 CHECK_RETURN(skip(d, len)); 8003 break; 8004 } 8005 case UPB_WIRE_TYPE_START_GROUP: 8006 CHECK_SUSPEND(pushtagdelim(d, -fieldnum)); 8007 break; 8008 case UPB_WIRE_TYPE_END_GROUP: 8009 if (fieldnum == -d->top->groupnum) { 8010 decoder_pop(d); 8011 } else if (fieldnum == d->top->groupnum) { 8012 return DECODE_ENDGROUP; 8013 } else { 8014 seterr(d, "Unmatched ENDGROUP tag."); 8015 return upb_pbdecoder_suspend(d); 8016 } 8017 break; 8018 default: 8019 seterr(d, "Invalid wire type"); 8020 return upb_pbdecoder_suspend(d); 8021 } 8022 8023 if (d->top->groupnum >= 0) { 8024 return DECODE_OK; 8025 } 8026 8027 /* Unknown group -- continue looping over unknown fields. */ 8028 checkpoint(d); 8029 } 8030 } 8031 8032 static void goto_endmsg(upb_pbdecoder *d) { 8033 upb_value v; 8034 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v); 8035 UPB_ASSERT_VAR(found, found); 8036 d->pc = d->top->base + upb_value_getuint64(v); 8037 } 8038 8039 /* Parses a tag and jumps to the corresponding bytecode instruction for this 8040 * field. 8041 * 8042 * If the tag is unknown (or the wire type doesn't match), parses the field as 8043 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode 8044 * instruction for the end of message. */ 8045 static int32_t dispatch(upb_pbdecoder *d) { 8046 upb_inttable *dispatch = d->top->dispatch; 8047 uint32_t tag; 8048 uint8_t wire_type; 8049 uint32_t fieldnum; 8050 upb_value val; 8051 int32_t retval; 8052 8053 /* Decode tag. */ 8054 CHECK_RETURN(decode_v32(d, &tag)); 8055 wire_type = tag & 0x7; 8056 fieldnum = tag >> 3; 8057 8058 /* Lookup tag. Because of packed/non-packed compatibility, we have to 8059 * check the wire type against two possibilities. */ 8060 if (fieldnum != DISPATCH_ENDMSG && 8061 upb_inttable_lookup32(dispatch, fieldnum, &val)) { 8062 uint64_t v = upb_value_getuint64(val); 8063 if (wire_type == (v & 0xff)) { 8064 d->pc = d->top->base + (v >> 16); 8065 return DECODE_OK; 8066 } else if (wire_type == ((v >> 8) & 0xff)) { 8067 bool found = 8068 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val); 8069 UPB_ASSERT_VAR(found, found); 8070 d->pc = d->top->base + upb_value_getuint64(val); 8071 return DECODE_OK; 8072 } 8073 } 8074 8075 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG 8076 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which 8077 * we need to back up to, so that when we're done skipping unknown data we 8078 * can re-check the delimited end. */ 8079 d->last--; /* Necessary if we get suspended */ 8080 d->pc = d->last; 8081 assert(getop(*d->last) == OP_CHECKDELIM); 8082 8083 /* Unknown field or ENDGROUP. */ 8084 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); 8085 8086 CHECK_RETURN(retval); 8087 8088 if (retval == DECODE_ENDGROUP) { 8089 goto_endmsg(d); 8090 return DECODE_OK; 8091 } 8092 8093 return DECODE_OK; 8094 } 8095 8096 /* Callers know that the stack is more than one deep because the opcodes that 8097 * call this only occur after PUSH operations. */ 8098 upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) { 8099 assert(d->top != d->stack); 8100 return d->top - 1; 8101 } 8102 8103 8104 /* The main decoding loop *****************************************************/ 8105 8106 /* The main decoder VM function. Uses traditional bytecode dispatch loop with a 8107 * switch() statement. */ 8108 size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group, 8109 const upb_bufhandle* handle) { 8110 8111 #define VMCASE(op, code) \ 8112 case op: { code; if (consumes_input(op)) checkpoint(d); break; } 8113 #define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \ 8114 VMCASE(OP_PARSE_ ## type, { \ 8115 ctype val; \ 8116 CHECK_RETURN(decode_ ## wt(d, &val)); \ 8117 upb_sink_put ## name(&d->top->sink, arg, (convfunc)(val)); \ 8118 }) 8119 8120 while(1) { 8121 int32_t instruction; 8122 opcode op; 8123 uint32_t arg; 8124 int32_t longofs; 8125 8126 d->last = d->pc; 8127 instruction = *d->pc++; 8128 op = getop(instruction); 8129 arg = instruction >> 8; 8130 longofs = arg; 8131 assert(d->ptr != d->residual_end); 8132 UPB_UNUSED(group); 8133 #ifdef UPB_DUMP_BYTECODE 8134 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d " 8135 "%x %s (%d)\n", 8136 (int)offset(d), 8137 (int)(d->ptr - d->buf), 8138 (int)(d->data_end - d->ptr), 8139 (int)(d->end - d->ptr), 8140 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)), 8141 (int)(d->pc - 1 - group->bytecode), 8142 upb_pbdecoder_getopname(op), 8143 arg); 8144 #endif 8145 switch (op) { 8146 /* Technically, we are losing data if we see a 32-bit varint that is not 8147 * properly sign-extended. We could detect this and error about the data 8148 * loss, but proto2 does not do this, so we pass. */ 8149 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t) 8150 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t) 8151 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t) 8152 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t) 8153 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t) 8154 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t) 8155 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t) 8156 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t) 8157 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t) 8158 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t) 8159 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t) 8160 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t) 8161 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t) 8162 8163 VMCASE(OP_SETDISPATCH, 8164 d->top->base = d->pc - 1; 8165 memcpy(&d->top->dispatch, d->pc, sizeof(void*)); 8166 d->pc += sizeof(void*) / sizeof(uint32_t); 8167 ) 8168 VMCASE(OP_STARTMSG, 8169 CHECK_SUSPEND(upb_sink_startmsg(&d->top->sink)); 8170 ) 8171 VMCASE(OP_ENDMSG, 8172 CHECK_SUSPEND(upb_sink_endmsg(&d->top->sink, d->status)); 8173 ) 8174 VMCASE(OP_STARTSEQ, 8175 upb_pbdecoder_frame *outer = outer_frame(d); 8176 CHECK_SUSPEND(upb_sink_startseq(&outer->sink, arg, &d->top->sink)); 8177 ) 8178 VMCASE(OP_ENDSEQ, 8179 CHECK_SUSPEND(upb_sink_endseq(&d->top->sink, arg)); 8180 ) 8181 VMCASE(OP_STARTSUBMSG, 8182 upb_pbdecoder_frame *outer = outer_frame(d); 8183 CHECK_SUSPEND(upb_sink_startsubmsg(&outer->sink, arg, &d->top->sink)); 8184 ) 8185 VMCASE(OP_ENDSUBMSG, 8186 CHECK_SUSPEND(upb_sink_endsubmsg(&d->top->sink, arg)); 8187 ) 8188 VMCASE(OP_STARTSTR, 8189 uint32_t len = delim_remaining(d); 8190 upb_pbdecoder_frame *outer = outer_frame(d); 8191 CHECK_SUSPEND(upb_sink_startstr(&outer->sink, arg, len, &d->top->sink)); 8192 if (len == 0) { 8193 d->pc++; /* Skip OP_STRING. */ 8194 } 8195 ) 8196 VMCASE(OP_STRING, 8197 uint32_t len = curbufleft(d); 8198 size_t n = upb_sink_putstring(&d->top->sink, arg, d->ptr, len, handle); 8199 if (n > len) { 8200 if (n > delim_remaining(d)) { 8201 seterr(d, "Tried to skip past end of string."); 8202 return upb_pbdecoder_suspend(d); 8203 } else { 8204 int32_t ret = skip(d, n); 8205 /* This shouldn't return DECODE_OK, because n > len. */ 8206 assert(ret >= 0); 8207 return ret; 8208 } 8209 } 8210 advance(d, n); 8211 if (n < len || d->delim_end == NULL) { 8212 /* We aren't finished with this string yet. */ 8213 d->pc--; /* Repeat OP_STRING. */ 8214 if (n > 0) checkpoint(d); 8215 return upb_pbdecoder_suspend(d); 8216 } 8217 ) 8218 VMCASE(OP_ENDSTR, 8219 CHECK_SUSPEND(upb_sink_endstr(&d->top->sink, arg)); 8220 ) 8221 VMCASE(OP_PUSHTAGDELIM, 8222 CHECK_SUSPEND(pushtagdelim(d, arg)); 8223 ) 8224 VMCASE(OP_SETBIGGROUPNUM, 8225 d->top->groupnum = *d->pc++; 8226 ) 8227 VMCASE(OP_POP, 8228 assert(d->top > d->stack); 8229 decoder_pop(d); 8230 ) 8231 VMCASE(OP_PUSHLENDELIM, 8232 uint32_t len; 8233 CHECK_RETURN(decode_v32(d, &len)); 8234 CHECK_SUSPEND(decoder_push(d, offset(d) + len)); 8235 set_delim_end(d); 8236 ) 8237 VMCASE(OP_SETDELIM, 8238 set_delim_end(d); 8239 ) 8240 VMCASE(OP_CHECKDELIM, 8241 /* We are guaranteed of this assert because we never allow ourselves to 8242 * consume bytes beyond data_end, which covers delim_end when non-NULL. 8243 */ 8244 assert(!(d->delim_end && d->ptr > d->delim_end)); 8245 if (d->ptr == d->delim_end) 8246 d->pc += longofs; 8247 ) 8248 VMCASE(OP_CALL, 8249 d->callstack[d->call_len++] = d->pc; 8250 d->pc += longofs; 8251 ) 8252 VMCASE(OP_RET, 8253 assert(d->call_len > 0); 8254 d->pc = d->callstack[--d->call_len]; 8255 ) 8256 VMCASE(OP_BRANCH, 8257 d->pc += longofs; 8258 ) 8259 VMCASE(OP_TAG1, 8260 uint8_t expected; 8261 CHECK_SUSPEND(curbufleft(d) > 0); 8262 expected = (arg >> 8) & 0xff; 8263 if (*d->ptr == expected) { 8264 advance(d, 1); 8265 } else { 8266 int8_t shortofs; 8267 badtag: 8268 shortofs = arg; 8269 if (shortofs == LABEL_DISPATCH) { 8270 CHECK_RETURN(dispatch(d)); 8271 } else { 8272 d->pc += shortofs; 8273 break; /* Avoid checkpoint(). */ 8274 } 8275 } 8276 ) 8277 VMCASE(OP_TAG2, 8278 uint16_t expected; 8279 CHECK_SUSPEND(curbufleft(d) > 0); 8280 expected = (arg >> 8) & 0xffff; 8281 if (curbufleft(d) >= 2) { 8282 uint16_t actual; 8283 memcpy(&actual, d->ptr, 2); 8284 if (expected == actual) { 8285 advance(d, 2); 8286 } else { 8287 goto badtag; 8288 } 8289 } else { 8290 int32_t result = upb_pbdecoder_checktag_slow(d, expected); 8291 if (result == DECODE_MISMATCH) goto badtag; 8292 if (result >= 0) return result; 8293 } 8294 ) 8295 VMCASE(OP_TAGN, { 8296 uint64_t expected; 8297 int32_t result; 8298 memcpy(&expected, d->pc, 8); 8299 d->pc += 2; 8300 result = upb_pbdecoder_checktag_slow(d, expected); 8301 if (result == DECODE_MISMATCH) goto badtag; 8302 if (result >= 0) return result; 8303 }) 8304 VMCASE(OP_DISPATCH, { 8305 CHECK_RETURN(dispatch(d)); 8306 }) 8307 VMCASE(OP_HALT, { 8308 return d->size_param; 8309 }) 8310 } 8311 } 8312 } 8313 8314 8315 /* BytesHandler handlers ******************************************************/ 8316 8317 void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { 8318 upb_pbdecoder *d = closure; 8319 UPB_UNUSED(size_hint); 8320 d->top->end_ofs = UINT64_MAX; 8321 d->bufstart_ofs = 0; 8322 d->call_len = 1; 8323 d->callstack[0] = &halt; 8324 d->pc = pc; 8325 d->skip = 0; 8326 return d; 8327 } 8328 8329 void *upb_pbdecoder_startjit(void *closure, const void *hd, size_t size_hint) { 8330 upb_pbdecoder *d = closure; 8331 UPB_UNUSED(hd); 8332 UPB_UNUSED(size_hint); 8333 d->top->end_ofs = UINT64_MAX; 8334 d->bufstart_ofs = 0; 8335 d->call_len = 0; 8336 d->skip = 0; 8337 return d; 8338 } 8339 8340 bool upb_pbdecoder_end(void *closure, const void *handler_data) { 8341 upb_pbdecoder *d = closure; 8342 const upb_pbdecodermethod *method = handler_data; 8343 uint64_t end; 8344 char dummy; 8345 8346 if (d->residual_end > d->residual) { 8347 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data"); 8348 return false; 8349 } 8350 8351 if (d->skip) { 8352 seterr(d, "Unexpected EOF inside skipped data"); 8353 return false; 8354 } 8355 8356 if (d->top->end_ofs != UINT64_MAX) { 8357 seterr(d, "Unexpected EOF inside delimited string"); 8358 return false; 8359 } 8360 8361 /* The user's end() call indicates that the message ends here. */ 8362 end = offset(d); 8363 d->top->end_ofs = end; 8364 8365 #ifdef UPB_USE_JIT_X64 8366 if (method->is_native_) { 8367 const mgroup *group = (const mgroup*)method->group; 8368 if (d->top != d->stack) 8369 d->stack->end_ofs = 0; 8370 group->jit_code(closure, method->code_base.ptr, &dummy, 0, NULL); 8371 } else 8372 #endif 8373 { 8374 const uint32_t *p = d->pc; 8375 d->stack->end_ofs = end; 8376 /* Check the previous bytecode, but guard against beginning. */ 8377 if (p != method->code_base.ptr) p--; 8378 if (getop(*p) == OP_CHECKDELIM) { 8379 /* Rewind from OP_TAG* to OP_CHECKDELIM. */ 8380 assert(getop(*d->pc) == OP_TAG1 || 8381 getop(*d->pc) == OP_TAG2 || 8382 getop(*d->pc) == OP_TAGN || 8383 getop(*d->pc) == OP_DISPATCH); 8384 d->pc = p; 8385 } 8386 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL); 8387 } 8388 8389 if (d->call_len != 0) { 8390 seterr(d, "Unexpected EOF inside submessage or group"); 8391 return false; 8392 } 8393 8394 return true; 8395 } 8396 8397 size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf, 8398 size_t size, const upb_bufhandle *handle) { 8399 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle); 8400 8401 if (result == DECODE_ENDGROUP) goto_endmsg(decoder); 8402 CHECK_RETURN(result); 8403 8404 return run_decoder_vm(decoder, group, handle); 8405 } 8406 8407 8408 /* Public API *****************************************************************/ 8409 8410 void upb_pbdecoder_reset(upb_pbdecoder *d) { 8411 d->top = d->stack; 8412 d->top->groupnum = 0; 8413 d->ptr = d->residual; 8414 d->buf = d->residual; 8415 d->end = d->residual; 8416 d->residual_end = d->residual; 8417 } 8418 8419 upb_pbdecoder *upb_pbdecoder_create(upb_env *e, const upb_pbdecodermethod *m, 8420 upb_sink *sink) { 8421 const size_t default_max_nesting = 64; 8422 #ifndef NDEBUG 8423 size_t size_before = upb_env_bytesallocated(e); 8424 #endif 8425 8426 upb_pbdecoder *d = upb_env_malloc(e, sizeof(upb_pbdecoder)); 8427 if (!d) return NULL; 8428 8429 d->method_ = m; 8430 d->callstack = upb_env_malloc(e, callstacksize(d, default_max_nesting)); 8431 d->stack = upb_env_malloc(e, stacksize(d, default_max_nesting)); 8432 if (!d->stack || !d->callstack) { 8433 return NULL; 8434 } 8435 8436 d->env = e; 8437 d->limit = d->stack + default_max_nesting - 1; 8438 d->stack_size = default_max_nesting; 8439 8440 upb_pbdecoder_reset(d); 8441 upb_bytessink_reset(&d->input_, &m->input_handler_, d); 8442 8443 assert(sink); 8444 if (d->method_->dest_handlers_) { 8445 if (sink->handlers != d->method_->dest_handlers_) 8446 return NULL; 8447 } 8448 upb_sink_reset(&d->top->sink, sink->handlers, sink->closure); 8449 8450 /* If this fails, increase the value in decoder.h. */ 8451 assert(upb_env_bytesallocated(e) - size_before <= UPB_PB_DECODER_SIZE); 8452 return d; 8453 } 8454 8455 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { 8456 return offset(d); 8457 } 8458 8459 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { 8460 return d->method_; 8461 } 8462 8463 upb_bytessink *upb_pbdecoder_input(upb_pbdecoder *d) { 8464 return &d->input_; 8465 } 8466 8467 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { 8468 return d->stack_size; 8469 } 8470 8471 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { 8472 assert(d->top >= d->stack); 8473 8474 if (max < (size_t)(d->top - d->stack)) { 8475 /* Can't set a limit smaller than what we are currently at. */ 8476 return false; 8477 } 8478 8479 if (max > d->stack_size) { 8480 /* Need to reallocate stack and callstack to accommodate. */ 8481 size_t old_size = stacksize(d, d->stack_size); 8482 size_t new_size = stacksize(d, max); 8483 void *p = upb_env_realloc(d->env, d->stack, old_size, new_size); 8484 if (!p) { 8485 return false; 8486 } 8487 d->stack = p; 8488 8489 old_size = callstacksize(d, d->stack_size); 8490 new_size = callstacksize(d, max); 8491 p = upb_env_realloc(d->env, d->callstack, old_size, new_size); 8492 if (!p) { 8493 return false; 8494 } 8495 d->callstack = p; 8496 8497 d->stack_size = max; 8498 } 8499 8500 d->limit = d->stack + max - 1; 8501 return true; 8502 } 8503 /* 8504 ** upb::Encoder 8505 ** 8506 ** Since we are implementing pure handlers (ie. without any out-of-band access 8507 ** to pre-computed lengths), we have to buffer all submessages before we can 8508 ** emit even their first byte. 8509 ** 8510 ** Not knowing the size of submessages also means we can't write a perfect 8511 ** zero-copy implementation, even with buffering. Lengths are stored as 8512 ** varints, which means that we don't know how many bytes to reserve for the 8513 ** length until we know what the length is. 8514 ** 8515 ** This leaves us with three main choices: 8516 ** 8517 ** 1. buffer all submessage data in a temporary buffer, then copy it exactly 8518 ** once into the output buffer. 8519 ** 8520 ** 2. attempt to buffer data directly into the output buffer, estimating how 8521 ** many bytes each length will take. When our guesses are wrong, use 8522 ** memmove() to grow or shrink the allotted space. 8523 ** 8524 ** 3. buffer directly into the output buffer, allocating a max length 8525 ** ahead-of-time for each submessage length. If we overallocated, we waste 8526 ** space, but no memcpy() or memmove() is required. This approach requires 8527 ** defining a maximum size for submessages and rejecting submessages that 8528 ** exceed that size. 8529 ** 8530 ** (2) and (3) have the potential to have better performance, but they are more 8531 ** complicated and subtle to implement: 8532 ** 8533 ** (3) requires making an arbitrary choice of the maximum message size; it 8534 ** wastes space when submessages are shorter than this and fails 8535 ** completely when they are longer. This makes it more finicky and 8536 ** requires configuration based on the input. It also makes it impossible 8537 ** to perfectly match the output of reference encoders that always use the 8538 ** optimal amount of space for each length. 8539 ** 8540 ** (2) requires guessing the the size upfront, and if multiple lengths are 8541 ** guessed wrong the minimum required number of memmove() operations may 8542 ** be complicated to compute correctly. Implemented properly, it may have 8543 ** a useful amortized or average cost, but more investigation is required 8544 ** to determine this and what the optimal algorithm is to achieve it. 8545 ** 8546 ** (1) makes you always pay for exactly one copy, but its implementation is 8547 ** the simplest and its performance is predictable. 8548 ** 8549 ** So for now, we implement (1) only. If we wish to optimize later, we should 8550 ** be able to do it without affecting users. 8551 ** 8552 ** The strategy is to buffer the segments of data that do *not* depend on 8553 ** unknown lengths in one buffer, and keep a separate buffer of segment pointers 8554 ** and lengths. When the top-level submessage ends, we can go beginning to end, 8555 ** alternating the writing of lengths with memcpy() of the rest of the data. 8556 ** At the top level though, no buffering is required. 8557 */ 8558 8559 8560 #include <stdlib.h> 8561 8562 /* The output buffer is divided into segments; a segment is a string of data 8563 * that is "ready to go" -- it does not need any varint lengths inserted into 8564 * the middle. The seams between segments are where varints will be inserted 8565 * once they are known. 8566 * 8567 * We also use the concept of a "run", which is a range of encoded bytes that 8568 * occur at a single submessage level. Every segment contains one or more runs. 8569 * 8570 * A segment can span messages. Consider: 8571 * 8572 * .--Submessage lengths---------. 8573 * | | | 8574 * | V V 8575 * V | |--------------- | |----------------- 8576 * Submessages: | |----------------------------------------------- 8577 * Top-level msg: ------------------------------------------------------------ 8578 * 8579 * Segments: ----- ------------------- ----------------- 8580 * Runs: *---- *--------------*--- *---------------- 8581 * (* marks the start) 8582 * 8583 * Note that the top-level menssage is not in any segment because it does not 8584 * have any length preceding it. 8585 * 8586 * A segment is only interrupted when another length needs to be inserted. So 8587 * observe how the second segment spans both the inner submessage and part of 8588 * the next enclosing message. */ 8589 typedef struct { 8590 uint32_t msglen; /* The length to varint-encode before this segment. */ 8591 uint32_t seglen; /* Length of the segment. */ 8592 } upb_pb_encoder_segment; 8593 8594 struct upb_pb_encoder { 8595 upb_env *env; 8596 8597 /* Our input and output. */ 8598 upb_sink input_; 8599 upb_bytessink *output_; 8600 8601 /* The "subclosure" -- used as the inner closure as part of the bytessink 8602 * protocol. */ 8603 void *subc; 8604 8605 /* The output buffer and limit, and our current write position. "buf" 8606 * initially points to "initbuf", but is dynamically allocated if we need to 8607 * grow beyond the initial size. */ 8608 char *buf, *ptr, *limit; 8609 8610 /* The beginning of the current run, or undefined if we are at the top 8611 * level. */ 8612 char *runbegin; 8613 8614 /* The list of segments we are accumulating. */ 8615 upb_pb_encoder_segment *segbuf, *segptr, *seglimit; 8616 8617 /* The stack of enclosing submessages. Each entry in the stack points to the 8618 * segment where this submessage's length is being accumulated. */ 8619 int *stack, *top, *stacklimit; 8620 8621 /* Depth of startmsg/endmsg calls. */ 8622 int depth; 8623 }; 8624 8625 /* low-level buffering ********************************************************/ 8626 8627 /* Low-level functions for interacting with the output buffer. */ 8628 8629 /* TODO(haberman): handle pushback */ 8630 static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) { 8631 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL); 8632 UPB_ASSERT_VAR(n, n == len); 8633 } 8634 8635 static upb_pb_encoder_segment *top(upb_pb_encoder *e) { 8636 return &e->segbuf[*e->top]; 8637 } 8638 8639 /* Call to ensure that at least "bytes" bytes are available for writing at 8640 * e->ptr. Returns false if the bytes could not be allocated. */ 8641 static bool reserve(upb_pb_encoder *e, size_t bytes) { 8642 if ((size_t)(e->limit - e->ptr) < bytes) { 8643 /* Grow buffer. */ 8644 char *new_buf; 8645 size_t needed = bytes + (e->ptr - e->buf); 8646 size_t old_size = e->limit - e->buf; 8647 8648 size_t new_size = old_size; 8649 8650 while (new_size < needed) { 8651 new_size *= 2; 8652 } 8653 8654 new_buf = upb_env_realloc(e->env, e->buf, old_size, new_size); 8655 8656 if (new_buf == NULL) { 8657 return false; 8658 } 8659 8660 e->ptr = new_buf + (e->ptr - e->buf); 8661 e->runbegin = new_buf + (e->runbegin - e->buf); 8662 e->limit = new_buf + new_size; 8663 e->buf = new_buf; 8664 } 8665 8666 return true; 8667 } 8668 8669 /* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have 8670 * previously called reserve() with at least this many bytes. */ 8671 static void encoder_advance(upb_pb_encoder *e, size_t bytes) { 8672 assert((size_t)(e->limit - e->ptr) >= bytes); 8673 e->ptr += bytes; 8674 } 8675 8676 /* Call when all of the bytes for a handler have been written. Flushes the 8677 * bytes if possible and necessary, returning false if this failed. */ 8678 static bool commit(upb_pb_encoder *e) { 8679 if (!e->top) { 8680 /* We aren't inside a delimited region. Flush our accumulated bytes to 8681 * the output. 8682 * 8683 * TODO(haberman): in the future we may want to delay flushing for 8684 * efficiency reasons. */ 8685 putbuf(e, e->buf, e->ptr - e->buf); 8686 e->ptr = e->buf; 8687 } 8688 8689 return true; 8690 } 8691 8692 /* Writes the given bytes to the buffer, handling reserve/advance. */ 8693 static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) { 8694 if (!reserve(e, len)) { 8695 return false; 8696 } 8697 8698 memcpy(e->ptr, data, len); 8699 encoder_advance(e, len); 8700 return true; 8701 } 8702 8703 /* Finish the current run by adding the run totals to the segment and message 8704 * length. */ 8705 static void accumulate(upb_pb_encoder *e) { 8706 size_t run_len; 8707 assert(e->ptr >= e->runbegin); 8708 run_len = e->ptr - e->runbegin; 8709 e->segptr->seglen += run_len; 8710 top(e)->msglen += run_len; 8711 e->runbegin = e->ptr; 8712 } 8713 8714 /* Call to indicate the start of delimited region for which the full length is 8715 * not yet known. All data will be buffered until the length is known. 8716 * Delimited regions may be nested; their lengths will all be tracked properly. */ 8717 static bool start_delim(upb_pb_encoder *e) { 8718 if (e->top) { 8719 /* We are already buffering, advance to the next segment and push it on the 8720 * stack. */ 8721 accumulate(e); 8722 8723 if (++e->top == e->stacklimit) { 8724 /* TODO(haberman): grow stack? */ 8725 return false; 8726 } 8727 8728 if (++e->segptr == e->seglimit) { 8729 /* Grow segment buffer. */ 8730 size_t old_size = 8731 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); 8732 size_t new_size = old_size * 2; 8733 upb_pb_encoder_segment *new_buf = 8734 upb_env_realloc(e->env, e->segbuf, old_size, new_size); 8735 8736 if (new_buf == NULL) { 8737 return false; 8738 } 8739 8740 e->segptr = new_buf + (e->segptr - e->segbuf); 8741 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); 8742 e->segbuf = new_buf; 8743 } 8744 } else { 8745 /* We were previously at the top level, start buffering. */ 8746 e->segptr = e->segbuf; 8747 e->top = e->stack; 8748 e->runbegin = e->ptr; 8749 } 8750 8751 *e->top = e->segptr - e->segbuf; 8752 e->segptr->seglen = 0; 8753 e->segptr->msglen = 0; 8754 8755 return true; 8756 } 8757 8758 /* Call to indicate the end of a delimited region. We now know the length of 8759 * the delimited region. If we are not nested inside any other delimited 8760 * regions, we can now emit all of the buffered data we accumulated. */ 8761 static bool end_delim(upb_pb_encoder *e) { 8762 size_t msglen; 8763 accumulate(e); 8764 msglen = top(e)->msglen; 8765 8766 if (e->top == e->stack) { 8767 /* All lengths are now available, emit all buffered data. */ 8768 char buf[UPB_PB_VARINT_MAX_LEN]; 8769 upb_pb_encoder_segment *s; 8770 const char *ptr = e->buf; 8771 for (s = e->segbuf; s <= e->segptr; s++) { 8772 size_t lenbytes = upb_vencode64(s->msglen, buf); 8773 putbuf(e, buf, lenbytes); 8774 putbuf(e, ptr, s->seglen); 8775 ptr += s->seglen; 8776 } 8777 8778 e->ptr = e->buf; 8779 e->top = NULL; 8780 } else { 8781 /* Need to keep buffering; propagate length info into enclosing 8782 * submessages. */ 8783 --e->top; 8784 top(e)->msglen += msglen + upb_varint_size(msglen); 8785 } 8786 8787 return true; 8788 } 8789 8790 8791 /* tag_t **********************************************************************/ 8792 8793 /* A precomputed (pre-encoded) tag and length. */ 8794 8795 typedef struct { 8796 uint8_t bytes; 8797 char tag[7]; 8798 } tag_t; 8799 8800 /* Allocates a new tag for this field, and sets it in these handlerattr. */ 8801 static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt, 8802 upb_handlerattr *attr) { 8803 uint32_t n = upb_fielddef_number(f); 8804 8805 tag_t *tag = malloc(sizeof(tag_t)); 8806 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag); 8807 8808 upb_handlerattr_init(attr); 8809 upb_handlerattr_sethandlerdata(attr, tag); 8810 upb_handlers_addcleanup(h, tag, free); 8811 } 8812 8813 static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) { 8814 return encode_bytes(e, tag->tag, tag->bytes); 8815 } 8816 8817 8818 /* encoding of wire types *****************************************************/ 8819 8820 static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) { 8821 /* TODO(haberman): byte-swap for big endian. */ 8822 return encode_bytes(e, &val, sizeof(uint64_t)); 8823 } 8824 8825 static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) { 8826 /* TODO(haberman): byte-swap for big endian. */ 8827 return encode_bytes(e, &val, sizeof(uint32_t)); 8828 } 8829 8830 static bool encode_varint(upb_pb_encoder *e, uint64_t val) { 8831 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) { 8832 return false; 8833 } 8834 8835 encoder_advance(e, upb_vencode64(val, e->ptr)); 8836 return true; 8837 } 8838 8839 static uint64_t dbl2uint64(double d) { 8840 uint64_t ret; 8841 memcpy(&ret, &d, sizeof(uint64_t)); 8842 return ret; 8843 } 8844 8845 static uint32_t flt2uint32(float d) { 8846 uint32_t ret; 8847 memcpy(&ret, &d, sizeof(uint32_t)); 8848 return ret; 8849 } 8850 8851 8852 /* encoding of proto types ****************************************************/ 8853 8854 static bool startmsg(void *c, const void *hd) { 8855 upb_pb_encoder *e = c; 8856 UPB_UNUSED(hd); 8857 if (e->depth++ == 0) { 8858 upb_bytessink_start(e->output_, 0, &e->subc); 8859 } 8860 return true; 8861 } 8862 8863 static bool endmsg(void *c, const void *hd, upb_status *status) { 8864 upb_pb_encoder *e = c; 8865 UPB_UNUSED(hd); 8866 UPB_UNUSED(status); 8867 if (--e->depth == 0) { 8868 upb_bytessink_end(e->output_); 8869 } 8870 return true; 8871 } 8872 8873 static void *encode_startdelimfield(void *c, const void *hd) { 8874 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c); 8875 return ok ? c : UPB_BREAK; 8876 } 8877 8878 static bool encode_enddelimfield(void *c, const void *hd) { 8879 UPB_UNUSED(hd); 8880 return end_delim(c); 8881 } 8882 8883 static void *encode_startgroup(void *c, const void *hd) { 8884 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK; 8885 } 8886 8887 static bool encode_endgroup(void *c, const void *hd) { 8888 return encode_tag(c, hd) && commit(c); 8889 } 8890 8891 static void *encode_startstr(void *c, const void *hd, size_t size_hint) { 8892 UPB_UNUSED(size_hint); 8893 return encode_startdelimfield(c, hd); 8894 } 8895 8896 static size_t encode_strbuf(void *c, const void *hd, const char *buf, 8897 size_t len, const upb_bufhandle *h) { 8898 UPB_UNUSED(hd); 8899 UPB_UNUSED(h); 8900 return encode_bytes(c, buf, len) ? len : 0; 8901 } 8902 8903 #define T(type, ctype, convert, encode) \ 8904 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \ 8905 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \ 8906 } \ 8907 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \ 8908 UPB_UNUSED(hd); \ 8909 return encode(e, (convert)(val)); \ 8910 } 8911 8912 T(double, double, dbl2uint64, encode_fixed64) 8913 T(float, float, flt2uint32, encode_fixed32) 8914 T(int64, int64_t, uint64_t, encode_varint) 8915 T(int32, int32_t, uint32_t, encode_varint) 8916 T(fixed64, uint64_t, uint64_t, encode_fixed64) 8917 T(fixed32, uint32_t, uint32_t, encode_fixed32) 8918 T(bool, bool, bool, encode_varint) 8919 T(uint32, uint32_t, uint32_t, encode_varint) 8920 T(uint64, uint64_t, uint64_t, encode_varint) 8921 T(enum, int32_t, uint32_t, encode_varint) 8922 T(sfixed32, int32_t, uint32_t, encode_fixed32) 8923 T(sfixed64, int64_t, uint64_t, encode_fixed64) 8924 T(sint32, int32_t, upb_zzenc_32, encode_varint) 8925 T(sint64, int64_t, upb_zzenc_64, encode_varint) 8926 8927 #undef T 8928 8929 8930 /* code to build the handlers *************************************************/ 8931 8932 static void newhandlers_callback(const void *closure, upb_handlers *h) { 8933 const upb_msgdef *m; 8934 upb_msg_field_iter i; 8935 8936 UPB_UNUSED(closure); 8937 8938 upb_handlers_setstartmsg(h, startmsg, NULL); 8939 upb_handlers_setendmsg(h, endmsg, NULL); 8940 8941 m = upb_handlers_msgdef(h); 8942 for(upb_msg_field_begin(&i, m); 8943 !upb_msg_field_done(&i); 8944 upb_msg_field_next(&i)) { 8945 const upb_fielddef *f = upb_msg_iter_field(&i); 8946 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && 8947 upb_fielddef_packed(f); 8948 upb_handlerattr attr; 8949 upb_wiretype_t wt = 8950 packed ? UPB_WIRE_TYPE_DELIMITED 8951 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; 8952 8953 /* Pre-encode the tag for this field. */ 8954 new_tag(h, f, wt, &attr); 8955 8956 if (packed) { 8957 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr); 8958 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr); 8959 } 8960 8961 #define T(upper, lower, upbtype) \ 8962 case UPB_DESCRIPTOR_TYPE_##upper: \ 8963 if (packed) { \ 8964 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \ 8965 } else { \ 8966 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \ 8967 } \ 8968 break; 8969 8970 switch (upb_fielddef_descriptortype(f)) { 8971 T(DOUBLE, double, double); 8972 T(FLOAT, float, float); 8973 T(INT64, int64, int64); 8974 T(INT32, int32, int32); 8975 T(FIXED64, fixed64, uint64); 8976 T(FIXED32, fixed32, uint32); 8977 T(BOOL, bool, bool); 8978 T(UINT32, uint32, uint32); 8979 T(UINT64, uint64, uint64); 8980 T(ENUM, enum, int32); 8981 T(SFIXED32, sfixed32, int32); 8982 T(SFIXED64, sfixed64, int64); 8983 T(SINT32, sint32, int32); 8984 T(SINT64, sint64, int64); 8985 case UPB_DESCRIPTOR_TYPE_STRING: 8986 case UPB_DESCRIPTOR_TYPE_BYTES: 8987 upb_handlers_setstartstr(h, f, encode_startstr, &attr); 8988 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr); 8989 upb_handlers_setstring(h, f, encode_strbuf, &attr); 8990 break; 8991 case UPB_DESCRIPTOR_TYPE_MESSAGE: 8992 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr); 8993 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr); 8994 break; 8995 case UPB_DESCRIPTOR_TYPE_GROUP: { 8996 /* Endgroup takes a different tag (wire_type = END_GROUP). */ 8997 upb_handlerattr attr2; 8998 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2); 8999 9000 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr); 9001 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2); 9002 9003 upb_handlerattr_uninit(&attr2); 9004 break; 9005 } 9006 } 9007 9008 #undef T 9009 9010 upb_handlerattr_uninit(&attr); 9011 } 9012 } 9013 9014 void upb_pb_encoder_reset(upb_pb_encoder *e) { 9015 e->segptr = NULL; 9016 e->top = NULL; 9017 e->depth = 0; 9018 } 9019 9020 9021 /* public API *****************************************************************/ 9022 9023 const upb_handlers *upb_pb_encoder_newhandlers(const upb_msgdef *m, 9024 const void *owner) { 9025 return upb_handlers_newfrozen(m, owner, newhandlers_callback, NULL); 9026 } 9027 9028 upb_pb_encoder *upb_pb_encoder_create(upb_env *env, const upb_handlers *h, 9029 upb_bytessink *output) { 9030 const size_t initial_bufsize = 256; 9031 const size_t initial_segbufsize = 16; 9032 /* TODO(haberman): make this configurable. */ 9033 const size_t stack_size = 64; 9034 #ifndef NDEBUG 9035 const size_t size_before = upb_env_bytesallocated(env); 9036 #endif 9037 9038 upb_pb_encoder *e = upb_env_malloc(env, sizeof(upb_pb_encoder)); 9039 if (!e) return NULL; 9040 9041 e->buf = upb_env_malloc(env, initial_bufsize); 9042 e->segbuf = upb_env_malloc(env, initial_segbufsize * sizeof(*e->segbuf)); 9043 e->stack = upb_env_malloc(env, stack_size * sizeof(*e->stack)); 9044 9045 if (!e->buf || !e->segbuf || !e->stack) { 9046 return NULL; 9047 } 9048 9049 e->limit = e->buf + initial_bufsize; 9050 e->seglimit = e->segbuf + initial_segbufsize; 9051 e->stacklimit = e->stack + stack_size; 9052 9053 upb_pb_encoder_reset(e); 9054 upb_sink_reset(&e->input_, h, e); 9055 9056 e->env = env; 9057 e->output_ = output; 9058 e->subc = output->closure; 9059 e->ptr = e->buf; 9060 9061 /* If this fails, increase the value in encoder.h. */ 9062 assert(upb_env_bytesallocated(env) - size_before <= UPB_PB_ENCODER_SIZE); 9063 return e; 9064 } 9065 9066 upb_sink *upb_pb_encoder_input(upb_pb_encoder *e) { return &e->input_; } 9067 9068 9069 #include <stdio.h> 9070 #include <stdlib.h> 9071 #include <string.h> 9072 9073 upb_def **upb_load_defs_from_descriptor(const char *str, size_t len, int *n, 9074 void *owner, upb_status *status) { 9075 /* Create handlers. */ 9076 const upb_pbdecodermethod *decoder_m; 9077 const upb_handlers *reader_h = upb_descreader_newhandlers(&reader_h); 9078 upb_env env; 9079 upb_pbdecodermethodopts opts; 9080 upb_pbdecoder *decoder; 9081 upb_descreader *reader; 9082 bool ok; 9083 upb_def **ret = NULL; 9084 upb_def **defs; 9085 9086 upb_pbdecodermethodopts_init(&opts, reader_h); 9087 decoder_m = upb_pbdecodermethod_new(&opts, &decoder_m); 9088 9089 upb_env_init(&env); 9090 upb_env_reporterrorsto(&env, status); 9091 9092 reader = upb_descreader_create(&env, reader_h); 9093 decoder = upb_pbdecoder_create(&env, decoder_m, upb_descreader_input(reader)); 9094 9095 /* Push input data. */ 9096 ok = upb_bufsrc_putbuf(str, len, upb_pbdecoder_input(decoder)); 9097 9098 if (!ok) goto cleanup; 9099 defs = upb_descreader_getdefs(reader, owner, n); 9100 ret = malloc(sizeof(upb_def*) * (*n)); 9101 memcpy(ret, defs, sizeof(upb_def*) * (*n)); 9102 9103 cleanup: 9104 upb_env_uninit(&env); 9105 upb_handlers_unref(reader_h, &reader_h); 9106 upb_pbdecodermethod_unref(decoder_m, &decoder_m); 9107 return ret; 9108 } 9109 9110 bool upb_load_descriptor_into_symtab(upb_symtab *s, const char *str, size_t len, 9111 upb_status *status) { 9112 int n; 9113 bool success; 9114 upb_def **defs = upb_load_defs_from_descriptor(str, len, &n, &defs, status); 9115 if (!defs) return false; 9116 success = upb_symtab_add(s, defs, n, &defs, status); 9117 free(defs); 9118 return success; 9119 } 9120 9121 char *upb_readfile(const char *filename, size_t *len) { 9122 long size; 9123 char *buf; 9124 FILE *f = fopen(filename, "rb"); 9125 if(!f) return NULL; 9126 if(fseek(f, 0, SEEK_END) != 0) goto error; 9127 size = ftell(f); 9128 if(size < 0) goto error; 9129 if(fseek(f, 0, SEEK_SET) != 0) goto error; 9130 buf = malloc(size + 1); 9131 if(size && fread(buf, size, 1, f) != 1) goto error; 9132 fclose(f); 9133 if (len) *len = size; 9134 return buf; 9135 9136 error: 9137 fclose(f); 9138 return NULL; 9139 } 9140 9141 bool upb_load_descriptor_file_into_symtab(upb_symtab *symtab, const char *fname, 9142 upb_status *status) { 9143 size_t len; 9144 bool success; 9145 char *data = upb_readfile(fname, &len); 9146 if (!data) { 9147 if (status) upb_status_seterrf(status, "Couldn't read file: %s", fname); 9148 return false; 9149 } 9150 success = upb_load_descriptor_into_symtab(symtab, data, len, status); 9151 free(data); 9152 return success; 9153 } 9154 /* 9155 * upb::pb::TextPrinter 9156 * 9157 * OPT: This is not optimized at all. It uses printf() which parses the format 9158 * string every time, and it allocates memory for every put. 9159 */ 9160 9161 9162 #include <ctype.h> 9163 #include <float.h> 9164 #include <inttypes.h> 9165 #include <stdarg.h> 9166 #include <stdio.h> 9167 #include <stdlib.h> 9168 #include <string.h> 9169 9170 9171 struct upb_textprinter { 9172 upb_sink input_; 9173 upb_bytessink *output_; 9174 int indent_depth_; 9175 bool single_line_; 9176 void *subc; 9177 }; 9178 9179 #define CHECK(x) if ((x) < 0) goto err; 9180 9181 static const char *shortname(const char *longname) { 9182 const char *last = strrchr(longname, '.'); 9183 return last ? last + 1 : longname; 9184 } 9185 9186 static int indent(upb_textprinter *p) { 9187 int i; 9188 if (!p->single_line_) 9189 for (i = 0; i < p->indent_depth_; i++) 9190 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL); 9191 return 0; 9192 } 9193 9194 static int endfield(upb_textprinter *p) { 9195 const char ch = (p->single_line_ ? ' ' : '\n'); 9196 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL); 9197 return 0; 9198 } 9199 9200 static int putescaped(upb_textprinter *p, const char *buf, size_t len, 9201 bool preserve_utf8) { 9202 /* Based on CEscapeInternal() from Google's protobuf release. */ 9203 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); 9204 const char *end = buf + len; 9205 9206 /* I think hex is prettier and more useful, but proto2 uses octal; should 9207 * investigate whether it can parse hex also. */ 9208 const bool use_hex = false; 9209 bool last_hex_escape = false; /* true if last output char was \xNN */ 9210 9211 for (; buf < end; buf++) { 9212 bool is_hex_escape; 9213 9214 if (dstend - dst < 4) { 9215 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); 9216 dst = dstbuf; 9217 } 9218 9219 is_hex_escape = false; 9220 switch (*buf) { 9221 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break; 9222 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break; 9223 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break; 9224 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break; 9225 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break; 9226 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break; 9227 default: 9228 /* Note that if we emit \xNN and the buf character after that is a hex 9229 * digit then that digit must be escaped too to prevent it being 9230 * interpreted as part of the character code by C. */ 9231 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) && 9232 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) { 9233 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf); 9234 is_hex_escape = use_hex; 9235 dst += 4; 9236 } else { 9237 *(dst++) = *buf; break; 9238 } 9239 } 9240 last_hex_escape = is_hex_escape; 9241 } 9242 /* Flush remaining data. */ 9243 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); 9244 return 0; 9245 } 9246 9247 bool putf(upb_textprinter *p, const char *fmt, ...) { 9248 va_list args; 9249 va_list args_copy; 9250 char *str; 9251 int written; 9252 int len; 9253 bool ok; 9254 9255 va_start(args, fmt); 9256 9257 /* Run once to get the length of the string. */ 9258 _upb_va_copy(args_copy, args); 9259 len = _upb_vsnprintf(NULL, 0, fmt, args_copy); 9260 va_end(args_copy); 9261 9262 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */ 9263 str = malloc(len + 1); 9264 if (!str) return false; 9265 written = vsprintf(str, fmt, args); 9266 va_end(args); 9267 UPB_ASSERT_VAR(written, written == len); 9268 9269 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL); 9270 free(str); 9271 return ok; 9272 } 9273 9274 9275 /* handlers *******************************************************************/ 9276 9277 static bool textprinter_startmsg(void *c, const void *hd) { 9278 upb_textprinter *p = c; 9279 UPB_UNUSED(hd); 9280 if (p->indent_depth_ == 0) { 9281 upb_bytessink_start(p->output_, 0, &p->subc); 9282 } 9283 return true; 9284 } 9285 9286 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) { 9287 upb_textprinter *p = c; 9288 UPB_UNUSED(hd); 9289 UPB_UNUSED(s); 9290 if (p->indent_depth_ == 0) { 9291 upb_bytessink_end(p->output_); 9292 } 9293 return true; 9294 } 9295 9296 #define TYPE(name, ctype, fmt) \ 9297 static bool textprinter_put ## name(void *closure, const void *handler_data, \ 9298 ctype val) { \ 9299 upb_textprinter *p = closure; \ 9300 const upb_fielddef *f = handler_data; \ 9301 CHECK(indent(p)); \ 9302 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \ 9303 CHECK(endfield(p)); \ 9304 return true; \ 9305 err: \ 9306 return false; \ 9307 } 9308 9309 static bool textprinter_putbool(void *closure, const void *handler_data, 9310 bool val) { 9311 upb_textprinter *p = closure; 9312 const upb_fielddef *f = handler_data; 9313 CHECK(indent(p)); 9314 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false"); 9315 CHECK(endfield(p)); 9316 return true; 9317 err: 9318 return false; 9319 } 9320 9321 #define STRINGIFY_HELPER(x) #x 9322 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x) 9323 9324 TYPE(int32, int32_t, "%" PRId32) 9325 TYPE(int64, int64_t, "%" PRId64) 9326 TYPE(uint32, uint32_t, "%" PRIu32) 9327 TYPE(uint64, uint64_t, "%" PRIu64) 9328 TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") 9329 TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") 9330 9331 #undef TYPE 9332 9333 /* Output a symbolic value from the enum if found, else just print as int32. */ 9334 static bool textprinter_putenum(void *closure, const void *handler_data, 9335 int32_t val) { 9336 upb_textprinter *p = closure; 9337 const upb_fielddef *f = handler_data; 9338 const upb_enumdef *enum_def = upb_downcast_enumdef(upb_fielddef_subdef(f)); 9339 const char *label = upb_enumdef_iton(enum_def, val); 9340 if (label) { 9341 indent(p); 9342 putf(p, "%s: %s", upb_fielddef_name(f), label); 9343 endfield(p); 9344 } else { 9345 if (!textprinter_putint32(closure, handler_data, val)) 9346 return false; 9347 } 9348 return true; 9349 } 9350 9351 static void *textprinter_startstr(void *closure, const void *handler_data, 9352 size_t size_hint) { 9353 upb_textprinter *p = closure; 9354 const upb_fielddef *f = handler_data; 9355 UPB_UNUSED(size_hint); 9356 indent(p); 9357 putf(p, "%s: \"", upb_fielddef_name(f)); 9358 return p; 9359 } 9360 9361 static bool textprinter_endstr(void *closure, const void *handler_data) { 9362 upb_textprinter *p = closure; 9363 UPB_UNUSED(handler_data); 9364 putf(p, "\""); 9365 endfield(p); 9366 return true; 9367 } 9368 9369 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf, 9370 size_t len, const upb_bufhandle *handle) { 9371 upb_textprinter *p = closure; 9372 const upb_fielddef *f = hd; 9373 UPB_UNUSED(handle); 9374 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING)); 9375 return len; 9376 err: 9377 return 0; 9378 } 9379 9380 static void *textprinter_startsubmsg(void *closure, const void *handler_data) { 9381 upb_textprinter *p = closure; 9382 const char *name = handler_data; 9383 CHECK(indent(p)); 9384 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n'); 9385 p->indent_depth_++; 9386 return p; 9387 err: 9388 return UPB_BREAK; 9389 } 9390 9391 static bool textprinter_endsubmsg(void *closure, const void *handler_data) { 9392 upb_textprinter *p = closure; 9393 UPB_UNUSED(handler_data); 9394 p->indent_depth_--; 9395 CHECK(indent(p)); 9396 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL); 9397 CHECK(endfield(p)); 9398 return true; 9399 err: 9400 return false; 9401 } 9402 9403 static void onmreg(const void *c, upb_handlers *h) { 9404 const upb_msgdef *m = upb_handlers_msgdef(h); 9405 upb_msg_field_iter i; 9406 UPB_UNUSED(c); 9407 9408 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL); 9409 upb_handlers_setendmsg(h, textprinter_endmsg, NULL); 9410 9411 for(upb_msg_field_begin(&i, m); 9412 !upb_msg_field_done(&i); 9413 upb_msg_field_next(&i)) { 9414 upb_fielddef *f = upb_msg_iter_field(&i); 9415 upb_handlerattr attr = UPB_HANDLERATTR_INITIALIZER; 9416 upb_handlerattr_sethandlerdata(&attr, f); 9417 switch (upb_fielddef_type(f)) { 9418 case UPB_TYPE_INT32: 9419 upb_handlers_setint32(h, f, textprinter_putint32, &attr); 9420 break; 9421 case UPB_TYPE_INT64: 9422 upb_handlers_setint64(h, f, textprinter_putint64, &attr); 9423 break; 9424 case UPB_TYPE_UINT32: 9425 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr); 9426 break; 9427 case UPB_TYPE_UINT64: 9428 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr); 9429 break; 9430 case UPB_TYPE_FLOAT: 9431 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr); 9432 break; 9433 case UPB_TYPE_DOUBLE: 9434 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr); 9435 break; 9436 case UPB_TYPE_BOOL: 9437 upb_handlers_setbool(h, f, textprinter_putbool, &attr); 9438 break; 9439 case UPB_TYPE_STRING: 9440 case UPB_TYPE_BYTES: 9441 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr); 9442 upb_handlers_setstring(h, f, textprinter_putstr, &attr); 9443 upb_handlers_setendstr(h, f, textprinter_endstr, &attr); 9444 break; 9445 case UPB_TYPE_MESSAGE: { 9446 const char *name = 9447 upb_fielddef_istagdelim(f) 9448 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f))) 9449 : upb_fielddef_name(f); 9450 upb_handlerattr_sethandlerdata(&attr, name); 9451 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr); 9452 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr); 9453 break; 9454 } 9455 case UPB_TYPE_ENUM: 9456 upb_handlers_setint32(h, f, textprinter_putenum, &attr); 9457 break; 9458 } 9459 } 9460 } 9461 9462 static void textprinter_reset(upb_textprinter *p, bool single_line) { 9463 p->single_line_ = single_line; 9464 p->indent_depth_ = 0; 9465 } 9466 9467 9468 /* Public API *****************************************************************/ 9469 9470 upb_textprinter *upb_textprinter_create(upb_env *env, const upb_handlers *h, 9471 upb_bytessink *output) { 9472 upb_textprinter *p = upb_env_malloc(env, sizeof(upb_textprinter)); 9473 if (!p) return NULL; 9474 9475 p->output_ = output; 9476 upb_sink_reset(&p->input_, h, p); 9477 textprinter_reset(p, false); 9478 9479 return p; 9480 } 9481 9482 const upb_handlers *upb_textprinter_newhandlers(const upb_msgdef *m, 9483 const void *owner) { 9484 return upb_handlers_newfrozen(m, owner, &onmreg, NULL); 9485 } 9486 9487 upb_sink *upb_textprinter_input(upb_textprinter *p) { return &p->input_; } 9488 9489 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) { 9490 p->single_line_ = single_line; 9491 } 9492 9493 9494 /* Index is descriptor type. */ 9495 const uint8_t upb_pb_native_wire_types[] = { 9496 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ 9497 UPB_WIRE_TYPE_64BIT, /* DOUBLE */ 9498 UPB_WIRE_TYPE_32BIT, /* FLOAT */ 9499 UPB_WIRE_TYPE_VARINT, /* INT64 */ 9500 UPB_WIRE_TYPE_VARINT, /* UINT64 */ 9501 UPB_WIRE_TYPE_VARINT, /* INT32 */ 9502 UPB_WIRE_TYPE_64BIT, /* FIXED64 */ 9503 UPB_WIRE_TYPE_32BIT, /* FIXED32 */ 9504 UPB_WIRE_TYPE_VARINT, /* BOOL */ 9505 UPB_WIRE_TYPE_DELIMITED, /* STRING */ 9506 UPB_WIRE_TYPE_START_GROUP, /* GROUP */ 9507 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ 9508 UPB_WIRE_TYPE_DELIMITED, /* BYTES */ 9509 UPB_WIRE_TYPE_VARINT, /* UINT32 */ 9510 UPB_WIRE_TYPE_VARINT, /* ENUM */ 9511 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ 9512 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ 9513 UPB_WIRE_TYPE_VARINT, /* SINT32 */ 9514 UPB_WIRE_TYPE_VARINT, /* SINT64 */ 9515 }; 9516 9517 /* A basic branch-based decoder, uses 32-bit values to get good performance 9518 * on 32-bit architectures (but performs well on 64-bits also). 9519 * This scheme comes from the original Google Protobuf implementation 9520 * (proto2). */ 9521 upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) { 9522 upb_decoderet err = {NULL, 0}; 9523 const char *p = r.p; 9524 uint32_t low = (uint32_t)r.val; 9525 uint32_t high = 0; 9526 uint32_t b; 9527 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; 9528 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; 9529 b = *(p++); low |= (b & 0x7fU) << 28; 9530 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done; 9531 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done; 9532 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done; 9533 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done; 9534 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done; 9535 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done; 9536 return err; 9537 9538 done: 9539 r.val = ((uint64_t)high << 32) | low; 9540 r.p = p; 9541 return r; 9542 } 9543 9544 /* Like the previous, but uses 64-bit values. */ 9545 upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) { 9546 const char *p = r.p; 9547 uint64_t val = r.val; 9548 uint64_t b; 9549 upb_decoderet err = {NULL, 0}; 9550 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; 9551 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; 9552 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done; 9553 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done; 9554 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done; 9555 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done; 9556 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done; 9557 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done; 9558 return err; 9559 9560 done: 9561 r.val = val; 9562 r.p = p; 9563 return r; 9564 } 9565 9566 /* Given an encoded varint v, returns an integer with a single bit set that 9567 * indicates the end of the varint. Subtracting one from this value will 9568 * yield a mask that leaves only bits that are part of the varint. Returns 9569 * 0 if the varint is unterminated. */ 9570 static uint64_t upb_get_vstopbit(uint64_t v) { 9571 uint64_t cbits = v | 0x7f7f7f7f7f7f7f7fULL; 9572 return ~cbits & (cbits+1); 9573 } 9574 9575 /* A branchless decoder. Credit to Pascal Massimino for the bit-twiddling. */ 9576 upb_decoderet upb_vdecode_max8_massimino(upb_decoderet r) { 9577 uint64_t b; 9578 uint64_t stop_bit; 9579 upb_decoderet my_r; 9580 memcpy(&b, r.p, sizeof(b)); 9581 stop_bit = upb_get_vstopbit(b); 9582 b = (b & 0x7f7f7f7f7f7f7f7fULL) & (stop_bit - 1); 9583 b += b & 0x007f007f007f007fULL; 9584 b += 3 * (b & 0x0000ffff0000ffffULL); 9585 b += 15 * (b & 0x00000000ffffffffULL); 9586 if (stop_bit == 0) { 9587 /* Error: unterminated varint. */ 9588 upb_decoderet err_r = {(void*)0, 0}; 9589 return err_r; 9590 } 9591 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), 9592 r.val | (b << 7)); 9593 return my_r; 9594 } 9595 9596 /* A branchless decoder. Credit to Daniel Wright for the bit-twiddling. */ 9597 upb_decoderet upb_vdecode_max8_wright(upb_decoderet r) { 9598 uint64_t b; 9599 uint64_t stop_bit; 9600 upb_decoderet my_r; 9601 memcpy(&b, r.p, sizeof(b)); 9602 stop_bit = upb_get_vstopbit(b); 9603 b &= (stop_bit - 1); 9604 b = ((b & 0x7f007f007f007f00ULL) >> 1) | (b & 0x007f007f007f007fULL); 9605 b = ((b & 0xffff0000ffff0000ULL) >> 2) | (b & 0x0000ffff0000ffffULL); 9606 b = ((b & 0xffffffff00000000ULL) >> 4) | (b & 0x00000000ffffffffULL); 9607 if (stop_bit == 0) { 9608 /* Error: unterminated varint. */ 9609 upb_decoderet err_r = {(void*)0, 0}; 9610 return err_r; 9611 } 9612 my_r = upb_decoderet_make(r.p + ((__builtin_ctzll(stop_bit) + 1) / 8), 9613 r.val | (b << 14)); 9614 return my_r; 9615 } 9616 9617 #line 1 "upb/json/parser.rl" 9618 /* 9619 ** upb::json::Parser (upb_json_parser) 9620 ** 9621 ** A parser that uses the Ragel State Machine Compiler to generate 9622 ** the finite automata. 9623 ** 9624 ** Ragel only natively handles regular languages, but we can manually 9625 ** program it a bit to handle context-free languages like JSON, by using 9626 ** the "fcall" and "fret" constructs. 9627 ** 9628 ** This parser can handle the basics, but needs several things to be fleshed 9629 ** out: 9630 ** 9631 ** - handling of unicode escape sequences (including high surrogate pairs). 9632 ** - properly check and report errors for unknown fields, stack overflow, 9633 ** improper array nesting (or lack of nesting). 9634 ** - handling of base64 sequences with padding characters. 9635 ** - handling of push-back (non-success returns from sink functions). 9636 ** - handling of keys/escape-sequences/etc that span input buffers. 9637 */ 9638 9639 #include <stdio.h> 9640 #include <stdint.h> 9641 #include <assert.h> 9642 #include <string.h> 9643 #include <stdlib.h> 9644 #include <errno.h> 9645 9646 9647 #define UPB_JSON_MAX_DEPTH 64 9648 9649 typedef struct { 9650 upb_sink sink; 9651 9652 /* The current message in which we're parsing, and the field whose value we're 9653 * expecting next. */ 9654 const upb_msgdef *m; 9655 const upb_fielddef *f; 9656 9657 /* We are in a repeated-field context, ready to emit mapentries as 9658 * submessages. This flag alters the start-of-object (open-brace) behavior to 9659 * begin a sequence of mapentry messages rather than a single submessage. */ 9660 bool is_map; 9661 9662 /* We are in a map-entry message context. This flag is set when parsing the 9663 * value field of a single map entry and indicates to all value-field parsers 9664 * (subobjects, strings, numbers, and bools) that the map-entry submessage 9665 * should end as soon as the value is parsed. */ 9666 bool is_mapentry; 9667 9668 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent 9669 * message's map field that we're currently parsing. This differs from |f| 9670 * because |f| is the field in the *current* message (i.e., the map-entry 9671 * message itself), not the parent's field that leads to this map. */ 9672 const upb_fielddef *mapfield; 9673 } upb_jsonparser_frame; 9674 9675 struct upb_json_parser { 9676 upb_env *env; 9677 upb_byteshandler input_handler_; 9678 upb_bytessink input_; 9679 9680 /* Stack to track the JSON scopes we are in. */ 9681 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; 9682 upb_jsonparser_frame *top; 9683 upb_jsonparser_frame *limit; 9684 9685 upb_status status; 9686 9687 /* Ragel's internal parsing stack for the parsing state machine. */ 9688 int current_state; 9689 int parser_stack[UPB_JSON_MAX_DEPTH]; 9690 int parser_top; 9691 9692 /* The handle for the current buffer. */ 9693 const upb_bufhandle *handle; 9694 9695 /* Accumulate buffer. See details in parser.rl. */ 9696 const char *accumulated; 9697 size_t accumulated_len; 9698 char *accumulate_buf; 9699 size_t accumulate_buf_size; 9700 9701 /* Multi-part text data. See details in parser.rl. */ 9702 int multipart_state; 9703 upb_selector_t string_selector; 9704 9705 /* Input capture. See details in parser.rl. */ 9706 const char *capture; 9707 9708 /* Intermediate result of parsing a unicode escape sequence. */ 9709 uint32_t digit; 9710 }; 9711 9712 #define PARSER_CHECK_RETURN(x) if (!(x)) return false 9713 9714 /* Used to signal that a capture has been suspended. */ 9715 static char suspend_capture; 9716 9717 static upb_selector_t getsel_for_handlertype(upb_json_parser *p, 9718 upb_handlertype_t type) { 9719 upb_selector_t sel; 9720 bool ok = upb_handlers_getselector(p->top->f, type, &sel); 9721 UPB_ASSERT_VAR(ok, ok); 9722 return sel; 9723 } 9724 9725 static upb_selector_t parser_getsel(upb_json_parser *p) { 9726 return getsel_for_handlertype( 9727 p, upb_handlers_getprimitivehandlertype(p->top->f)); 9728 } 9729 9730 static bool check_stack(upb_json_parser *p) { 9731 if ((p->top + 1) == p->limit) { 9732 upb_status_seterrmsg(&p->status, "Nesting too deep"); 9733 upb_env_reporterror(p->env, &p->status); 9734 return false; 9735 } 9736 9737 return true; 9738 } 9739 9740 /* There are GCC/Clang built-ins for overflow checking which we could start 9741 * using if there was any performance benefit to it. */ 9742 9743 static bool checked_add(size_t a, size_t b, size_t *c) { 9744 if (SIZE_MAX - a < b) return false; 9745 *c = a + b; 9746 return true; 9747 } 9748 9749 static size_t saturating_multiply(size_t a, size_t b) { 9750 /* size_t is unsigned, so this is defined behavior even on overflow. */ 9751 size_t ret = a * b; 9752 if (b != 0 && ret / b != a) { 9753 ret = SIZE_MAX; 9754 } 9755 return ret; 9756 } 9757 9758 9759 /* Base64 decoding ************************************************************/ 9760 9761 /* TODO(haberman): make this streaming. */ 9762 9763 static const signed char b64table[] = { 9764 -1, -1, -1, -1, -1, -1, -1, -1, 9765 -1, -1, -1, -1, -1, -1, -1, -1, 9766 -1, -1, -1, -1, -1, -1, -1, -1, 9767 -1, -1, -1, -1, -1, -1, -1, -1, 9768 -1, -1, -1, -1, -1, -1, -1, -1, 9769 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, 9770 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, 9771 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, 9772 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, 9773 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, 9774 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, 9775 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, 9776 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, 9777 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, 9778 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, 9779 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, 9780 -1, -1, -1, -1, -1, -1, -1, -1, 9781 -1, -1, -1, -1, -1, -1, -1, -1, 9782 -1, -1, -1, -1, -1, -1, -1, -1, 9783 -1, -1, -1, -1, -1, -1, -1, -1, 9784 -1, -1, -1, -1, -1, -1, -1, -1, 9785 -1, -1, -1, -1, -1, -1, -1, -1, 9786 -1, -1, -1, -1, -1, -1, -1, -1, 9787 -1, -1, -1, -1, -1, -1, -1, -1, 9788 -1, -1, -1, -1, -1, -1, -1, -1, 9789 -1, -1, -1, -1, -1, -1, -1, -1, 9790 -1, -1, -1, -1, -1, -1, -1, -1, 9791 -1, -1, -1, -1, -1, -1, -1, -1, 9792 -1, -1, -1, -1, -1, -1, -1, -1, 9793 -1, -1, -1, -1, -1, -1, -1, -1, 9794 -1, -1, -1, -1, -1, -1, -1, -1, 9795 -1, -1, -1, -1, -1, -1, -1, -1 9796 }; 9797 9798 /* Returns the table value sign-extended to 32 bits. Knowing that the upper 9799 * bits will be 1 for unrecognized characters makes it easier to check for 9800 * this error condition later (see below). */ 9801 int32_t b64lookup(unsigned char ch) { return b64table[ch]; } 9802 9803 /* Returns true if the given character is not a valid base64 character or 9804 * padding. */ 9805 bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } 9806 9807 static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, 9808 size_t len) { 9809 const char *limit = ptr + len; 9810 for (; ptr < limit; ptr += 4) { 9811 uint32_t val; 9812 char output[3]; 9813 9814 if (limit - ptr < 4) { 9815 upb_status_seterrf(&p->status, 9816 "Base64 input for bytes field not a multiple of 4: %s", 9817 upb_fielddef_name(p->top->f)); 9818 upb_env_reporterror(p->env, &p->status); 9819 return false; 9820 } 9821 9822 val = b64lookup(ptr[0]) << 18 | 9823 b64lookup(ptr[1]) << 12 | 9824 b64lookup(ptr[2]) << 6 | 9825 b64lookup(ptr[3]); 9826 9827 /* Test the upper bit; returns true if any of the characters returned -1. */ 9828 if (val & 0x80000000) { 9829 goto otherchar; 9830 } 9831 9832 output[0] = val >> 16; 9833 output[1] = (val >> 8) & 0xff; 9834 output[2] = val & 0xff; 9835 upb_sink_putstring(&p->top->sink, sel, output, 3, NULL); 9836 } 9837 return true; 9838 9839 otherchar: 9840 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) || 9841 nonbase64(ptr[3]) ) { 9842 upb_status_seterrf(&p->status, 9843 "Non-base64 characters in bytes field: %s", 9844 upb_fielddef_name(p->top->f)); 9845 upb_env_reporterror(p->env, &p->status); 9846 return false; 9847 } if (ptr[2] == '=') { 9848 uint32_t val; 9849 char output; 9850 9851 /* Last group contains only two input bytes, one output byte. */ 9852 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { 9853 goto badpadding; 9854 } 9855 9856 val = b64lookup(ptr[0]) << 18 | 9857 b64lookup(ptr[1]) << 12; 9858 9859 assert(!(val & 0x80000000)); 9860 output = val >> 16; 9861 upb_sink_putstring(&p->top->sink, sel, &output, 1, NULL); 9862 return true; 9863 } else { 9864 uint32_t val; 9865 char output[2]; 9866 9867 /* Last group contains only three input bytes, two output bytes. */ 9868 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { 9869 goto badpadding; 9870 } 9871 9872 val = b64lookup(ptr[0]) << 18 | 9873 b64lookup(ptr[1]) << 12 | 9874 b64lookup(ptr[2]) << 6; 9875 9876 output[0] = val >> 16; 9877 output[1] = (val >> 8) & 0xff; 9878 upb_sink_putstring(&p->top->sink, sel, output, 2, NULL); 9879 return true; 9880 } 9881 9882 badpadding: 9883 upb_status_seterrf(&p->status, 9884 "Incorrect base64 padding for field: %s (%.*s)", 9885 upb_fielddef_name(p->top->f), 9886 4, ptr); 9887 upb_env_reporterror(p->env, &p->status); 9888 return false; 9889 } 9890 9891 9892 /* Accumulate buffer **********************************************************/ 9893 9894 /* Functionality for accumulating a buffer. 9895 * 9896 * Some parts of the parser need an entire value as a contiguous string. For 9897 * example, to look up a member name in a hash table, or to turn a string into 9898 * a number, the relevant library routines need the input string to be in 9899 * contiguous memory, even if the value spanned two or more buffers in the 9900 * input. These routines handle that. 9901 * 9902 * In the common case we can just point to the input buffer to get this 9903 * contiguous string and avoid any actual copy. So we optimistically begin 9904 * this way. But there are a few cases where we must instead copy into a 9905 * separate buffer: 9906 * 9907 * 1. The string was not contiguous in the input (it spanned buffers). 9908 * 9909 * 2. The string included escape sequences that need to be interpreted to get 9910 * the true value in a contiguous buffer. */ 9911 9912 static void assert_accumulate_empty(upb_json_parser *p) { 9913 UPB_UNUSED(p); 9914 assert(p->accumulated == NULL); 9915 assert(p->accumulated_len == 0); 9916 } 9917 9918 static void accumulate_clear(upb_json_parser *p) { 9919 p->accumulated = NULL; 9920 p->accumulated_len = 0; 9921 } 9922 9923 /* Used internally by accumulate_append(). */ 9924 static bool accumulate_realloc(upb_json_parser *p, size_t need) { 9925 void *mem; 9926 size_t old_size = p->accumulate_buf_size; 9927 size_t new_size = UPB_MAX(old_size, 128); 9928 while (new_size < need) { 9929 new_size = saturating_multiply(new_size, 2); 9930 } 9931 9932 mem = upb_env_realloc(p->env, p->accumulate_buf, old_size, new_size); 9933 if (!mem) { 9934 upb_status_seterrmsg(&p->status, "Out of memory allocating buffer."); 9935 upb_env_reporterror(p->env, &p->status); 9936 return false; 9937 } 9938 9939 p->accumulate_buf = mem; 9940 p->accumulate_buf_size = new_size; 9941 return true; 9942 } 9943 9944 /* Logically appends the given data to the append buffer. 9945 * If "can_alias" is true, we will try to avoid actually copying, but the buffer 9946 * must be valid until the next accumulate_append() call (if any). */ 9947 static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, 9948 bool can_alias) { 9949 size_t need; 9950 9951 if (!p->accumulated && can_alias) { 9952 p->accumulated = buf; 9953 p->accumulated_len = len; 9954 return true; 9955 } 9956 9957 if (!checked_add(p->accumulated_len, len, &need)) { 9958 upb_status_seterrmsg(&p->status, "Integer overflow."); 9959 upb_env_reporterror(p->env, &p->status); 9960 return false; 9961 } 9962 9963 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { 9964 return false; 9965 } 9966 9967 if (p->accumulated != p->accumulate_buf) { 9968 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); 9969 p->accumulated = p->accumulate_buf; 9970 } 9971 9972 memcpy(p->accumulate_buf + p->accumulated_len, buf, len); 9973 p->accumulated_len += len; 9974 return true; 9975 } 9976 9977 /* Returns a pointer to the data accumulated since the last accumulate_clear() 9978 * call, and writes the length to *len. This with point either to the input 9979 * buffer or a temporary accumulate buffer. */ 9980 static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { 9981 assert(p->accumulated); 9982 *len = p->accumulated_len; 9983 return p->accumulated; 9984 } 9985 9986 9987 /* Mult-part text data ********************************************************/ 9988 9989 /* When we have text data in the input, it can often come in multiple segments. 9990 * For example, there may be some raw string data followed by an escape 9991 * sequence. The two segments are processed with different logic. Also buffer 9992 * seams in the input can cause multiple segments. 9993 * 9994 * As we see segments, there are two main cases for how we want to process them: 9995 * 9996 * 1. we want to push the captured input directly to string handlers. 9997 * 9998 * 2. we need to accumulate all the parts into a contiguous buffer for further 9999 * processing (field name lookup, string->number conversion, etc). */ 10000 10001 /* This is the set of states for p->multipart_state. */ 10002 enum { 10003 /* We are not currently processing multipart data. */ 10004 MULTIPART_INACTIVE = 0, 10005 10006 /* We are processing multipart data by accumulating it into a contiguous 10007 * buffer. */ 10008 MULTIPART_ACCUMULATE = 1, 10009 10010 /* We are processing multipart data by pushing each part directly to the 10011 * current string handlers. */ 10012 MULTIPART_PUSHEAGERLY = 2 10013 }; 10014 10015 /* Start a multi-part text value where we accumulate the data for processing at 10016 * the end. */ 10017 static void multipart_startaccum(upb_json_parser *p) { 10018 assert_accumulate_empty(p); 10019 assert(p->multipart_state == MULTIPART_INACTIVE); 10020 p->multipart_state = MULTIPART_ACCUMULATE; 10021 } 10022 10023 /* Start a multi-part text value where we immediately push text data to a string 10024 * value with the given selector. */ 10025 static void multipart_start(upb_json_parser *p, upb_selector_t sel) { 10026 assert_accumulate_empty(p); 10027 assert(p->multipart_state == MULTIPART_INACTIVE); 10028 p->multipart_state = MULTIPART_PUSHEAGERLY; 10029 p->string_selector = sel; 10030 } 10031 10032 static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, 10033 bool can_alias) { 10034 switch (p->multipart_state) { 10035 case MULTIPART_INACTIVE: 10036 upb_status_seterrmsg( 10037 &p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); 10038 upb_env_reporterror(p->env, &p->status); 10039 return false; 10040 10041 case MULTIPART_ACCUMULATE: 10042 if (!accumulate_append(p, buf, len, can_alias)) { 10043 return false; 10044 } 10045 break; 10046 10047 case MULTIPART_PUSHEAGERLY: { 10048 const upb_bufhandle *handle = can_alias ? p->handle : NULL; 10049 upb_sink_putstring(&p->top->sink, p->string_selector, buf, len, handle); 10050 break; 10051 } 10052 } 10053 10054 return true; 10055 } 10056 10057 /* Note: this invalidates the accumulate buffer! Call only after reading its 10058 * contents. */ 10059 static void multipart_end(upb_json_parser *p) { 10060 assert(p->multipart_state != MULTIPART_INACTIVE); 10061 p->multipart_state = MULTIPART_INACTIVE; 10062 accumulate_clear(p); 10063 } 10064 10065 10066 /* Input capture **************************************************************/ 10067 10068 /* Functionality for capturing a region of the input as text. Gracefully 10069 * handles the case where a buffer seam occurs in the middle of the captured 10070 * region. */ 10071 10072 static void capture_begin(upb_json_parser *p, const char *ptr) { 10073 assert(p->multipart_state != MULTIPART_INACTIVE); 10074 assert(p->capture == NULL); 10075 p->capture = ptr; 10076 } 10077 10078 static bool capture_end(upb_json_parser *p, const char *ptr) { 10079 assert(p->capture); 10080 if (multipart_text(p, p->capture, ptr - p->capture, true)) { 10081 p->capture = NULL; 10082 return true; 10083 } else { 10084 return false; 10085 } 10086 } 10087 10088 /* This is called at the end of each input buffer (ie. when we have hit a 10089 * buffer seam). If we are in the middle of capturing the input, this 10090 * processes the unprocessed capture region. */ 10091 static void capture_suspend(upb_json_parser *p, const char **ptr) { 10092 if (!p->capture) return; 10093 10094 if (multipart_text(p, p->capture, *ptr - p->capture, false)) { 10095 /* We use this as a signal that we were in the middle of capturing, and 10096 * that capturing should resume at the beginning of the next buffer. 10097 * 10098 * We can't use *ptr here, because we have no guarantee that this pointer 10099 * will be valid when we resume (if the underlying memory is freed, then 10100 * using the pointer at all, even to compare to NULL, is likely undefined 10101 * behavior). */ 10102 p->capture = &suspend_capture; 10103 } else { 10104 /* Need to back up the pointer to the beginning of the capture, since 10105 * we were not able to actually preserve it. */ 10106 *ptr = p->capture; 10107 } 10108 } 10109 10110 static void capture_resume(upb_json_parser *p, const char *ptr) { 10111 if (p->capture) { 10112 assert(p->capture == &suspend_capture); 10113 p->capture = ptr; 10114 } 10115 } 10116 10117 10118 /* Callbacks from the parser **************************************************/ 10119 10120 /* These are the functions called directly from the parser itself. 10121 * We define these in the same order as their declarations in the parser. */ 10122 10123 static char escape_char(char in) { 10124 switch (in) { 10125 case 'r': return '\r'; 10126 case 't': return '\t'; 10127 case 'n': return '\n'; 10128 case 'f': return '\f'; 10129 case 'b': return '\b'; 10130 case '/': return '/'; 10131 case '"': return '"'; 10132 case '\\': return '\\'; 10133 default: 10134 assert(0); 10135 return 'x'; 10136 } 10137 } 10138 10139 static bool escape(upb_json_parser *p, const char *ptr) { 10140 char ch = escape_char(*ptr); 10141 return multipart_text(p, &ch, 1, false); 10142 } 10143 10144 static void start_hex(upb_json_parser *p) { 10145 p->digit = 0; 10146 } 10147 10148 static void hexdigit(upb_json_parser *p, const char *ptr) { 10149 char ch = *ptr; 10150 10151 p->digit <<= 4; 10152 10153 if (ch >= '0' && ch <= '9') { 10154 p->digit += (ch - '0'); 10155 } else if (ch >= 'a' && ch <= 'f') { 10156 p->digit += ((ch - 'a') + 10); 10157 } else { 10158 assert(ch >= 'A' && ch <= 'F'); 10159 p->digit += ((ch - 'A') + 10); 10160 } 10161 } 10162 10163 static bool end_hex(upb_json_parser *p) { 10164 uint32_t codepoint = p->digit; 10165 10166 /* emit the codepoint as UTF-8. */ 10167 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ 10168 int length = 0; 10169 if (codepoint <= 0x7F) { 10170 utf8[0] = codepoint; 10171 length = 1; 10172 } else if (codepoint <= 0x07FF) { 10173 utf8[1] = (codepoint & 0x3F) | 0x80; 10174 codepoint >>= 6; 10175 utf8[0] = (codepoint & 0x1F) | 0xC0; 10176 length = 2; 10177 } else /* codepoint <= 0xFFFF */ { 10178 utf8[2] = (codepoint & 0x3F) | 0x80; 10179 codepoint >>= 6; 10180 utf8[1] = (codepoint & 0x3F) | 0x80; 10181 codepoint >>= 6; 10182 utf8[0] = (codepoint & 0x0F) | 0xE0; 10183 length = 3; 10184 } 10185 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate 10186 * we have to wait for the next escape to get the full code point). */ 10187 10188 return multipart_text(p, utf8, length, false); 10189 } 10190 10191 static void start_text(upb_json_parser *p, const char *ptr) { 10192 capture_begin(p, ptr); 10193 } 10194 10195 static bool end_text(upb_json_parser *p, const char *ptr) { 10196 return capture_end(p, ptr); 10197 } 10198 10199 static void start_number(upb_json_parser *p, const char *ptr) { 10200 multipart_startaccum(p); 10201 capture_begin(p, ptr); 10202 } 10203 10204 static bool parse_number(upb_json_parser *p); 10205 10206 static bool end_number(upb_json_parser *p, const char *ptr) { 10207 if (!capture_end(p, ptr)) { 10208 return false; 10209 } 10210 10211 return parse_number(p); 10212 } 10213 10214 static bool parse_number(upb_json_parser *p) { 10215 size_t len; 10216 const char *buf; 10217 const char *myend; 10218 char *end; 10219 10220 /* strtol() and friends unfortunately do not support specifying the length of 10221 * the input string, so we need to force a copy into a NULL-terminated buffer. */ 10222 if (!multipart_text(p, "\0", 1, false)) { 10223 return false; 10224 } 10225 10226 buf = accumulate_getptr(p, &len); 10227 myend = buf + len - 1; /* One for NULL. */ 10228 10229 /* XXX: We are using strtol to parse integers, but this is wrong as even 10230 * integers can be represented as 1e6 (for example), which strtol can't 10231 * handle correctly. 10232 * 10233 * XXX: Also, we can't handle large integers properly because strto[u]ll 10234 * isn't in C89. 10235 * 10236 * XXX: Also, we don't properly check floats for overflow, since strtof 10237 * isn't in C89. */ 10238 switch (upb_fielddef_type(p->top->f)) { 10239 case UPB_TYPE_ENUM: 10240 case UPB_TYPE_INT32: { 10241 long val = strtol(p->accumulated, &end, 0); 10242 if (val > INT32_MAX || val < INT32_MIN || errno == ERANGE || end != myend) 10243 goto err; 10244 else 10245 upb_sink_putint32(&p->top->sink, parser_getsel(p), val); 10246 break; 10247 } 10248 case UPB_TYPE_INT64: { 10249 long long val = strtol(p->accumulated, &end, 0); 10250 if (val > INT64_MAX || val < INT64_MIN || errno == ERANGE || end != myend) 10251 goto err; 10252 else 10253 upb_sink_putint64(&p->top->sink, parser_getsel(p), val); 10254 break; 10255 } 10256 case UPB_TYPE_UINT32: { 10257 unsigned long val = strtoul(p->accumulated, &end, 0); 10258 if (val > UINT32_MAX || errno == ERANGE || end != myend) 10259 goto err; 10260 else 10261 upb_sink_putuint32(&p->top->sink, parser_getsel(p), val); 10262 break; 10263 } 10264 case UPB_TYPE_UINT64: { 10265 unsigned long long val = strtoul(p->accumulated, &end, 0); 10266 if (val > UINT64_MAX || errno == ERANGE || end != myend) 10267 goto err; 10268 else 10269 upb_sink_putuint64(&p->top->sink, parser_getsel(p), val); 10270 break; 10271 } 10272 case UPB_TYPE_DOUBLE: { 10273 double val = strtod(p->accumulated, &end); 10274 if (errno == ERANGE || end != myend) 10275 goto err; 10276 else 10277 upb_sink_putdouble(&p->top->sink, parser_getsel(p), val); 10278 break; 10279 } 10280 case UPB_TYPE_FLOAT: { 10281 float val = strtod(p->accumulated, &end); 10282 if (errno == ERANGE || end != myend) 10283 goto err; 10284 else 10285 upb_sink_putfloat(&p->top->sink, parser_getsel(p), val); 10286 break; 10287 } 10288 default: 10289 assert(false); 10290 } 10291 10292 multipart_end(p); 10293 10294 return true; 10295 10296 err: 10297 upb_status_seterrf(&p->status, "error parsing number: %s", buf); 10298 upb_env_reporterror(p->env, &p->status); 10299 multipart_end(p); 10300 return false; 10301 } 10302 10303 static bool parser_putbool(upb_json_parser *p, bool val) { 10304 bool ok; 10305 10306 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { 10307 upb_status_seterrf(&p->status, 10308 "Boolean value specified for non-bool field: %s", 10309 upb_fielddef_name(p->top->f)); 10310 upb_env_reporterror(p->env, &p->status); 10311 return false; 10312 } 10313 10314 ok = upb_sink_putbool(&p->top->sink, parser_getsel(p), val); 10315 UPB_ASSERT_VAR(ok, ok); 10316 10317 return true; 10318 } 10319 10320 static bool start_stringval(upb_json_parser *p) { 10321 assert(p->top->f); 10322 10323 if (upb_fielddef_isstring(p->top->f)) { 10324 upb_jsonparser_frame *inner; 10325 upb_selector_t sel; 10326 10327 if (!check_stack(p)) return false; 10328 10329 /* Start a new parser frame: parser frames correspond one-to-one with 10330 * handler frames, and string events occur in a sub-frame. */ 10331 inner = p->top + 1; 10332 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 10333 upb_sink_startstr(&p->top->sink, sel, 0, &inner->sink); 10334 inner->m = p->top->m; 10335 inner->f = p->top->f; 10336 inner->is_map = false; 10337 inner->is_mapentry = false; 10338 p->top = inner; 10339 10340 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { 10341 /* For STRING fields we push data directly to the handlers as it is 10342 * parsed. We don't do this yet for BYTES fields, because our base64 10343 * decoder is not streaming. 10344 * 10345 * TODO(haberman): make base64 decoding streaming also. */ 10346 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); 10347 return true; 10348 } else { 10349 multipart_startaccum(p); 10350 return true; 10351 } 10352 } else if (upb_fielddef_type(p->top->f) == UPB_TYPE_ENUM) { 10353 /* No need to push a frame -- symbolic enum names in quotes remain in the 10354 * current parser frame. 10355 * 10356 * Enum string values must accumulate so we can look up the value in a table 10357 * once it is complete. */ 10358 multipart_startaccum(p); 10359 return true; 10360 } else { 10361 upb_status_seterrf(&p->status, 10362 "String specified for non-string/non-enum field: %s", 10363 upb_fielddef_name(p->top->f)); 10364 upb_env_reporterror(p->env, &p->status); 10365 return false; 10366 } 10367 } 10368 10369 static bool end_stringval(upb_json_parser *p) { 10370 bool ok = true; 10371 10372 switch (upb_fielddef_type(p->top->f)) { 10373 case UPB_TYPE_BYTES: 10374 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), 10375 p->accumulated, p->accumulated_len)) { 10376 return false; 10377 } 10378 /* Fall through. */ 10379 10380 case UPB_TYPE_STRING: { 10381 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 10382 upb_sink_endstr(&p->top->sink, sel); 10383 p->top--; 10384 break; 10385 } 10386 10387 case UPB_TYPE_ENUM: { 10388 /* Resolve enum symbolic name to integer value. */ 10389 const upb_enumdef *enumdef = 10390 (const upb_enumdef*)upb_fielddef_subdef(p->top->f); 10391 10392 size_t len; 10393 const char *buf = accumulate_getptr(p, &len); 10394 10395 int32_t int_val = 0; 10396 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); 10397 10398 if (ok) { 10399 upb_selector_t sel = parser_getsel(p); 10400 upb_sink_putint32(&p->top->sink, sel, int_val); 10401 } else { 10402 upb_status_seterrf(&p->status, "Enum value unknown: '%.*s'", len, buf); 10403 upb_env_reporterror(p->env, &p->status); 10404 } 10405 10406 break; 10407 } 10408 10409 default: 10410 assert(false); 10411 upb_status_seterrmsg(&p->status, "Internal error in JSON decoder"); 10412 upb_env_reporterror(p->env, &p->status); 10413 ok = false; 10414 break; 10415 } 10416 10417 multipart_end(p); 10418 10419 return ok; 10420 } 10421 10422 static void start_member(upb_json_parser *p) { 10423 assert(!p->top->f); 10424 multipart_startaccum(p); 10425 } 10426 10427 /* Helper: invoked during parse_mapentry() to emit the mapentry message's key 10428 * field based on the current contents of the accumulate buffer. */ 10429 static bool parse_mapentry_key(upb_json_parser *p) { 10430 10431 size_t len; 10432 const char *buf = accumulate_getptr(p, &len); 10433 10434 /* Emit the key field. We do a bit of ad-hoc parsing here because the 10435 * parser state machine has already decided that this is a string field 10436 * name, and we are reinterpreting it as some arbitrary key type. In 10437 * particular, integer and bool keys are quoted, so we need to parse the 10438 * quoted string contents here. */ 10439 10440 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); 10441 if (p->top->f == NULL) { 10442 upb_status_seterrmsg(&p->status, "mapentry message has no key"); 10443 upb_env_reporterror(p->env, &p->status); 10444 return false; 10445 } 10446 switch (upb_fielddef_type(p->top->f)) { 10447 case UPB_TYPE_INT32: 10448 case UPB_TYPE_INT64: 10449 case UPB_TYPE_UINT32: 10450 case UPB_TYPE_UINT64: 10451 /* Invoke end_number. The accum buffer has the number's text already. */ 10452 if (!parse_number(p)) { 10453 return false; 10454 } 10455 break; 10456 case UPB_TYPE_BOOL: 10457 if (len == 4 && !strncmp(buf, "true", 4)) { 10458 if (!parser_putbool(p, true)) { 10459 return false; 10460 } 10461 } else if (len == 5 && !strncmp(buf, "false", 5)) { 10462 if (!parser_putbool(p, false)) { 10463 return false; 10464 } 10465 } else { 10466 upb_status_seterrmsg(&p->status, 10467 "Map bool key not 'true' or 'false'"); 10468 upb_env_reporterror(p->env, &p->status); 10469 return false; 10470 } 10471 multipart_end(p); 10472 break; 10473 case UPB_TYPE_STRING: 10474 case UPB_TYPE_BYTES: { 10475 upb_sink subsink; 10476 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 10477 upb_sink_startstr(&p->top->sink, sel, len, &subsink); 10478 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 10479 upb_sink_putstring(&subsink, sel, buf, len, NULL); 10480 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 10481 upb_sink_endstr(&subsink, sel); 10482 multipart_end(p); 10483 break; 10484 } 10485 default: 10486 upb_status_seterrmsg(&p->status, "Invalid field type for map key"); 10487 upb_env_reporterror(p->env, &p->status); 10488 return false; 10489 } 10490 10491 return true; 10492 } 10493 10494 /* Helper: emit one map entry (as a submessage in the map field sequence). This 10495 * is invoked from end_membername(), at the end of the map entry's key string, 10496 * with the map key in the accumulate buffer. It parses the key from that 10497 * buffer, emits the handler calls to start the mapentry submessage (setting up 10498 * its subframe in the process), and sets up state in the subframe so that the 10499 * value parser (invoked next) will emit the mapentry's value field and then 10500 * end the mapentry message. */ 10501 10502 static bool handle_mapentry(upb_json_parser *p) { 10503 const upb_fielddef *mapfield; 10504 const upb_msgdef *mapentrymsg; 10505 upb_jsonparser_frame *inner; 10506 upb_selector_t sel; 10507 10508 /* Map entry: p->top->sink is the seq frame, so we need to start a frame 10509 * for the mapentry itself, and then set |f| in that frame so that the map 10510 * value field is parsed, and also set a flag to end the frame after the 10511 * map-entry value is parsed. */ 10512 if (!check_stack(p)) return false; 10513 10514 mapfield = p->top->mapfield; 10515 mapentrymsg = upb_fielddef_msgsubdef(mapfield); 10516 10517 inner = p->top + 1; 10518 p->top->f = mapfield; 10519 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); 10520 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); 10521 inner->m = mapentrymsg; 10522 inner->mapfield = mapfield; 10523 inner->is_map = false; 10524 10525 /* Don't set this to true *yet* -- we reuse parsing handlers below to push 10526 * the key field value to the sink, and these handlers will pop the frame 10527 * if they see is_mapentry (when invoked by the parser state machine, they 10528 * would have just seen the map-entry value, not key). */ 10529 inner->is_mapentry = false; 10530 p->top = inner; 10531 10532 /* send STARTMSG in submsg frame. */ 10533 upb_sink_startmsg(&p->top->sink); 10534 10535 parse_mapentry_key(p); 10536 10537 /* Set up the value field to receive the map-entry value. */ 10538 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); 10539 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ 10540 p->top->mapfield = mapfield; 10541 if (p->top->f == NULL) { 10542 upb_status_seterrmsg(&p->status, "mapentry message has no value"); 10543 upb_env_reporterror(p->env, &p->status); 10544 return false; 10545 } 10546 10547 return true; 10548 } 10549 10550 static bool end_membername(upb_json_parser *p) { 10551 assert(!p->top->f); 10552 10553 if (p->top->is_map) { 10554 return handle_mapentry(p); 10555 } else { 10556 size_t len; 10557 const char *buf = accumulate_getptr(p, &len); 10558 const upb_fielddef *f = upb_msgdef_ntof(p->top->m, buf, len); 10559 10560 if (!f) { 10561 /* TODO(haberman): Ignore unknown fields if requested/configured to do 10562 * so. */ 10563 upb_status_seterrf(&p->status, "No such field: %.*s\n", (int)len, buf); 10564 upb_env_reporterror(p->env, &p->status); 10565 return false; 10566 } 10567 10568 p->top->f = f; 10569 multipart_end(p); 10570 10571 return true; 10572 } 10573 } 10574 10575 static void end_member(upb_json_parser *p) { 10576 /* If we just parsed a map-entry value, end that frame too. */ 10577 if (p->top->is_mapentry) { 10578 upb_status s = UPB_STATUS_INIT; 10579 upb_selector_t sel; 10580 bool ok; 10581 const upb_fielddef *mapfield; 10582 10583 assert(p->top > p->stack); 10584 /* send ENDMSG on submsg. */ 10585 upb_sink_endmsg(&p->top->sink, &s); 10586 mapfield = p->top->mapfield; 10587 10588 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ 10589 p->top--; 10590 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); 10591 UPB_ASSERT_VAR(ok, ok); 10592 upb_sink_endsubmsg(&p->top->sink, sel); 10593 } 10594 10595 p->top->f = NULL; 10596 } 10597 10598 static bool start_subobject(upb_json_parser *p) { 10599 assert(p->top->f); 10600 10601 if (upb_fielddef_ismap(p->top->f)) { 10602 upb_jsonparser_frame *inner; 10603 upb_selector_t sel; 10604 10605 /* Beginning of a map. Start a new parser frame in a repeated-field 10606 * context. */ 10607 if (!check_stack(p)) return false; 10608 10609 inner = p->top + 1; 10610 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); 10611 upb_sink_startseq(&p->top->sink, sel, &inner->sink); 10612 inner->m = upb_fielddef_msgsubdef(p->top->f); 10613 inner->mapfield = p->top->f; 10614 inner->f = NULL; 10615 inner->is_map = true; 10616 inner->is_mapentry = false; 10617 p->top = inner; 10618 10619 return true; 10620 } else if (upb_fielddef_issubmsg(p->top->f)) { 10621 upb_jsonparser_frame *inner; 10622 upb_selector_t sel; 10623 10624 /* Beginning of a subobject. Start a new parser frame in the submsg 10625 * context. */ 10626 if (!check_stack(p)) return false; 10627 10628 inner = p->top + 1; 10629 10630 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); 10631 upb_sink_startsubmsg(&p->top->sink, sel, &inner->sink); 10632 inner->m = upb_fielddef_msgsubdef(p->top->f); 10633 inner->f = NULL; 10634 inner->is_map = false; 10635 inner->is_mapentry = false; 10636 p->top = inner; 10637 10638 return true; 10639 } else { 10640 upb_status_seterrf(&p->status, 10641 "Object specified for non-message/group field: %s", 10642 upb_fielddef_name(p->top->f)); 10643 upb_env_reporterror(p->env, &p->status); 10644 return false; 10645 } 10646 } 10647 10648 static void end_subobject(upb_json_parser *p) { 10649 if (p->top->is_map) { 10650 upb_selector_t sel; 10651 p->top--; 10652 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); 10653 upb_sink_endseq(&p->top->sink, sel); 10654 } else { 10655 upb_selector_t sel; 10656 p->top--; 10657 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); 10658 upb_sink_endsubmsg(&p->top->sink, sel); 10659 } 10660 } 10661 10662 static bool start_array(upb_json_parser *p) { 10663 upb_jsonparser_frame *inner; 10664 upb_selector_t sel; 10665 10666 assert(p->top->f); 10667 10668 if (!upb_fielddef_isseq(p->top->f)) { 10669 upb_status_seterrf(&p->status, 10670 "Array specified for non-repeated field: %s", 10671 upb_fielddef_name(p->top->f)); 10672 upb_env_reporterror(p->env, &p->status); 10673 return false; 10674 } 10675 10676 if (!check_stack(p)) return false; 10677 10678 inner = p->top + 1; 10679 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); 10680 upb_sink_startseq(&p->top->sink, sel, &inner->sink); 10681 inner->m = p->top->m; 10682 inner->f = p->top->f; 10683 inner->is_map = false; 10684 inner->is_mapentry = false; 10685 p->top = inner; 10686 10687 return true; 10688 } 10689 10690 static void end_array(upb_json_parser *p) { 10691 upb_selector_t sel; 10692 10693 assert(p->top > p->stack); 10694 10695 p->top--; 10696 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); 10697 upb_sink_endseq(&p->top->sink, sel); 10698 } 10699 10700 static void start_object(upb_json_parser *p) { 10701 if (!p->top->is_map) { 10702 upb_sink_startmsg(&p->top->sink); 10703 } 10704 } 10705 10706 static void end_object(upb_json_parser *p) { 10707 if (!p->top->is_map) { 10708 upb_status status; 10709 upb_status_clear(&status); 10710 upb_sink_endmsg(&p->top->sink, &status); 10711 if (!upb_ok(&status)) { 10712 upb_env_reporterror(p->env, &status); 10713 } 10714 } 10715 } 10716 10717 10718 #define CHECK_RETURN_TOP(x) if (!(x)) goto error 10719 10720 10721 /* The actual parser **********************************************************/ 10722 10723 /* What follows is the Ragel parser itself. The language is specified in Ragel 10724 * and the actions call our C functions above. 10725 * 10726 * Ragel has an extensive set of functionality, and we use only a small part of 10727 * it. There are many action types but we only use a few: 10728 * 10729 * ">" -- transition into a machine 10730 * "%" -- transition out of a machine 10731 * "@" -- transition into a final state of a machine. 10732 * 10733 * "@" transitions are tricky because a machine can transition into a final 10734 * state repeatedly. But in some cases we know this can't happen, for example 10735 * a string which is delimited by a final '"' can only transition into its 10736 * final state once, when the closing '"' is seen. */ 10737 10738 10739 #line 1218 "upb/json/parser.rl" 10740 10741 10742 10743 #line 1130 "upb/json/parser.c" 10744 static const char _json_actions[] = { 10745 0, 1, 0, 1, 2, 1, 3, 1, 10746 5, 1, 6, 1, 7, 1, 8, 1, 10747 10, 1, 12, 1, 13, 1, 14, 1, 10748 15, 1, 16, 1, 17, 1, 21, 1, 10749 25, 1, 27, 2, 3, 8, 2, 4, 10750 5, 2, 6, 2, 2, 6, 8, 2, 10751 11, 9, 2, 13, 15, 2, 14, 15, 10752 2, 18, 1, 2, 19, 27, 2, 20, 10753 9, 2, 22, 27, 2, 23, 27, 2, 10754 24, 27, 2, 26, 27, 3, 14, 11, 10755 9 10756 }; 10757 10758 static const unsigned char _json_key_offsets[] = { 10759 0, 0, 4, 9, 14, 15, 19, 24, 10760 29, 34, 38, 42, 45, 48, 50, 54, 10761 58, 60, 62, 67, 69, 71, 80, 86, 10762 92, 98, 104, 106, 115, 116, 116, 116, 10763 121, 126, 131, 132, 133, 134, 135, 135, 10764 136, 137, 138, 138, 139, 140, 141, 141, 10765 146, 151, 152, 156, 161, 166, 171, 175, 10766 175, 178, 178, 178 10767 }; 10768 10769 static const char _json_trans_keys[] = { 10770 32, 123, 9, 13, 32, 34, 125, 9, 10771 13, 32, 34, 125, 9, 13, 34, 32, 10772 58, 9, 13, 32, 93, 125, 9, 13, 10773 32, 44, 125, 9, 13, 32, 44, 125, 10774 9, 13, 32, 34, 9, 13, 45, 48, 10775 49, 57, 48, 49, 57, 46, 69, 101, 10776 48, 57, 69, 101, 48, 57, 43, 45, 10777 48, 57, 48, 57, 48, 57, 46, 69, 10778 101, 48, 57, 34, 92, 34, 92, 34, 10779 47, 92, 98, 102, 110, 114, 116, 117, 10780 48, 57, 65, 70, 97, 102, 48, 57, 10781 65, 70, 97, 102, 48, 57, 65, 70, 10782 97, 102, 48, 57, 65, 70, 97, 102, 10783 34, 92, 34, 45, 91, 102, 110, 116, 10784 123, 48, 57, 34, 32, 93, 125, 9, 10785 13, 32, 44, 93, 9, 13, 32, 93, 10786 125, 9, 13, 97, 108, 115, 101, 117, 10787 108, 108, 114, 117, 101, 32, 34, 125, 10788 9, 13, 32, 34, 125, 9, 13, 34, 10789 32, 58, 9, 13, 32, 93, 125, 9, 10790 13, 32, 44, 125, 9, 13, 32, 44, 10791 125, 9, 13, 32, 34, 9, 13, 32, 10792 9, 13, 0 10793 }; 10794 10795 static const char _json_single_lengths[] = { 10796 0, 2, 3, 3, 1, 2, 3, 3, 10797 3, 2, 2, 1, 3, 0, 2, 2, 10798 0, 0, 3, 2, 2, 9, 0, 0, 10799 0, 0, 2, 7, 1, 0, 0, 3, 10800 3, 3, 1, 1, 1, 1, 0, 1, 10801 1, 1, 0, 1, 1, 1, 0, 3, 10802 3, 1, 2, 3, 3, 3, 2, 0, 10803 1, 0, 0, 0 10804 }; 10805 10806 static const char _json_range_lengths[] = { 10807 0, 1, 1, 1, 0, 1, 1, 1, 10808 1, 1, 1, 1, 0, 1, 1, 1, 10809 1, 1, 1, 0, 0, 0, 3, 3, 10810 3, 3, 0, 1, 0, 0, 0, 1, 10811 1, 1, 0, 0, 0, 0, 0, 0, 10812 0, 0, 0, 0, 0, 0, 0, 1, 10813 1, 0, 1, 1, 1, 1, 1, 0, 10814 1, 0, 0, 0 10815 }; 10816 10817 static const short _json_index_offsets[] = { 10818 0, 0, 4, 9, 14, 16, 20, 25, 10819 30, 35, 39, 43, 46, 50, 52, 56, 10820 60, 62, 64, 69, 72, 75, 85, 89, 10821 93, 97, 101, 104, 113, 115, 116, 117, 10822 122, 127, 132, 134, 136, 138, 140, 141, 10823 143, 145, 147, 148, 150, 152, 154, 155, 10824 160, 165, 167, 171, 176, 181, 186, 190, 10825 191, 194, 195, 196 10826 }; 10827 10828 static const char _json_indicies[] = { 10829 0, 2, 0, 1, 3, 4, 5, 3, 10830 1, 6, 7, 8, 6, 1, 9, 1, 10831 10, 11, 10, 1, 11, 1, 1, 11, 10832 12, 13, 14, 15, 13, 1, 16, 17, 10833 8, 16, 1, 17, 7, 17, 1, 18, 10834 19, 20, 1, 19, 20, 1, 22, 23, 10835 23, 21, 24, 1, 23, 23, 24, 21, 10836 25, 25, 26, 1, 26, 1, 26, 21, 10837 22, 23, 23, 20, 21, 28, 29, 27, 10838 31, 32, 30, 33, 33, 33, 33, 33, 10839 33, 33, 33, 34, 1, 35, 35, 35, 10840 1, 36, 36, 36, 1, 37, 37, 37, 10841 1, 38, 38, 38, 1, 40, 41, 39, 10842 42, 43, 44, 45, 46, 47, 48, 43, 10843 1, 49, 1, 50, 51, 53, 54, 1, 10844 53, 52, 55, 56, 54, 55, 1, 56, 10845 1, 1, 56, 52, 57, 1, 58, 1, 10846 59, 1, 60, 1, 61, 62, 1, 63, 10847 1, 64, 1, 65, 66, 1, 67, 1, 10848 68, 1, 69, 70, 71, 72, 70, 1, 10849 73, 74, 75, 73, 1, 76, 1, 77, 10850 78, 77, 1, 78, 1, 1, 78, 79, 10851 80, 81, 82, 80, 1, 83, 84, 75, 10852 83, 1, 84, 74, 84, 1, 85, 86, 10853 86, 1, 1, 1, 1, 0 10854 }; 10855 10856 static const char _json_trans_targs[] = { 10857 1, 0, 2, 3, 4, 56, 3, 4, 10858 56, 5, 5, 6, 7, 8, 9, 56, 10859 8, 9, 11, 12, 18, 57, 13, 15, 10860 14, 16, 17, 20, 58, 21, 20, 58, 10861 21, 19, 22, 23, 24, 25, 26, 20, 10862 58, 21, 28, 30, 31, 34, 39, 43, 10863 47, 29, 59, 59, 32, 31, 29, 32, 10864 33, 35, 36, 37, 38, 59, 40, 41, 10865 42, 59, 44, 45, 46, 59, 48, 49, 10866 55, 48, 49, 55, 50, 50, 51, 52, 10867 53, 54, 55, 53, 54, 59, 56 10868 }; 10869 10870 static const char _json_trans_actions[] = { 10871 0, 0, 0, 21, 77, 53, 0, 47, 10872 23, 17, 0, 0, 15, 19, 19, 50, 10873 0, 0, 0, 0, 0, 1, 0, 0, 10874 0, 0, 0, 3, 13, 0, 0, 35, 10875 5, 11, 0, 38, 7, 7, 7, 41, 10876 44, 9, 62, 56, 25, 0, 0, 0, 10877 31, 29, 33, 59, 15, 0, 27, 0, 10878 0, 0, 0, 0, 0, 68, 0, 0, 10879 0, 71, 0, 0, 0, 65, 21, 77, 10880 53, 0, 47, 23, 17, 0, 0, 15, 10881 19, 19, 50, 0, 0, 74, 0 10882 }; 10883 10884 static const int json_start = 1; 10885 10886 static const int json_en_number_machine = 10; 10887 static const int json_en_string_machine = 19; 10888 static const int json_en_value_machine = 27; 10889 static const int json_en_main = 1; 10890 10891 10892 #line 1221 "upb/json/parser.rl" 10893 10894 size_t parse(void *closure, const void *hd, const char *buf, size_t size, 10895 const upb_bufhandle *handle) { 10896 upb_json_parser *parser = closure; 10897 10898 /* Variables used by Ragel's generated code. */ 10899 int cs = parser->current_state; 10900 int *stack = parser->parser_stack; 10901 int top = parser->parser_top; 10902 10903 const char *p = buf; 10904 const char *pe = buf + size; 10905 10906 parser->handle = handle; 10907 10908 UPB_UNUSED(hd); 10909 UPB_UNUSED(handle); 10910 10911 capture_resume(parser, buf); 10912 10913 10914 #line 1301 "upb/json/parser.c" 10915 { 10916 int _klen; 10917 unsigned int _trans; 10918 const char *_acts; 10919 unsigned int _nacts; 10920 const char *_keys; 10921 10922 if ( p == pe ) 10923 goto _test_eof; 10924 if ( cs == 0 ) 10925 goto _out; 10926 _resume: 10927 _keys = _json_trans_keys + _json_key_offsets[cs]; 10928 _trans = _json_index_offsets[cs]; 10929 10930 _klen = _json_single_lengths[cs]; 10931 if ( _klen > 0 ) { 10932 const char *_lower = _keys; 10933 const char *_mid; 10934 const char *_upper = _keys + _klen - 1; 10935 while (1) { 10936 if ( _upper < _lower ) 10937 break; 10938 10939 _mid = _lower + ((_upper-_lower) >> 1); 10940 if ( (*p) < *_mid ) 10941 _upper = _mid - 1; 10942 else if ( (*p) > *_mid ) 10943 _lower = _mid + 1; 10944 else { 10945 _trans += (unsigned int)(_mid - _keys); 10946 goto _match; 10947 } 10948 } 10949 _keys += _klen; 10950 _trans += _klen; 10951 } 10952 10953 _klen = _json_range_lengths[cs]; 10954 if ( _klen > 0 ) { 10955 const char *_lower = _keys; 10956 const char *_mid; 10957 const char *_upper = _keys + (_klen<<1) - 2; 10958 while (1) { 10959 if ( _upper < _lower ) 10960 break; 10961 10962 _mid = _lower + (((_upper-_lower) >> 1) & ~1); 10963 if ( (*p) < _mid[0] ) 10964 _upper = _mid - 2; 10965 else if ( (*p) > _mid[1] ) 10966 _lower = _mid + 2; 10967 else { 10968 _trans += (unsigned int)((_mid - _keys)>>1); 10969 goto _match; 10970 } 10971 } 10972 _trans += _klen; 10973 } 10974 10975 _match: 10976 _trans = _json_indicies[_trans]; 10977 cs = _json_trans_targs[_trans]; 10978 10979 if ( _json_trans_actions[_trans] == 0 ) 10980 goto _again; 10981 10982 _acts = _json_actions + _json_trans_actions[_trans]; 10983 _nacts = (unsigned int) *_acts++; 10984 while ( _nacts-- > 0 ) 10985 { 10986 switch ( *_acts++ ) 10987 { 10988 case 0: 10989 #line 1133 "upb/json/parser.rl" 10990 { p--; {cs = stack[--top]; goto _again;} } 10991 break; 10992 case 1: 10993 #line 1134 "upb/json/parser.rl" 10994 { p--; {stack[top++] = cs; cs = 10; goto _again;} } 10995 break; 10996 case 2: 10997 #line 1138 "upb/json/parser.rl" 10998 { start_text(parser, p); } 10999 break; 11000 case 3: 11001 #line 1139 "upb/json/parser.rl" 11002 { CHECK_RETURN_TOP(end_text(parser, p)); } 11003 break; 11004 case 4: 11005 #line 1145 "upb/json/parser.rl" 11006 { start_hex(parser); } 11007 break; 11008 case 5: 11009 #line 1146 "upb/json/parser.rl" 11010 { hexdigit(parser, p); } 11011 break; 11012 case 6: 11013 #line 1147 "upb/json/parser.rl" 11014 { CHECK_RETURN_TOP(end_hex(parser)); } 11015 break; 11016 case 7: 11017 #line 1153 "upb/json/parser.rl" 11018 { CHECK_RETURN_TOP(escape(parser, p)); } 11019 break; 11020 case 8: 11021 #line 1159 "upb/json/parser.rl" 11022 { p--; {cs = stack[--top]; goto _again;} } 11023 break; 11024 case 9: 11025 #line 1162 "upb/json/parser.rl" 11026 { {stack[top++] = cs; cs = 19; goto _again;} } 11027 break; 11028 case 10: 11029 #line 1164 "upb/json/parser.rl" 11030 { p--; {stack[top++] = cs; cs = 27; goto _again;} } 11031 break; 11032 case 11: 11033 #line 1169 "upb/json/parser.rl" 11034 { start_member(parser); } 11035 break; 11036 case 12: 11037 #line 1170 "upb/json/parser.rl" 11038 { CHECK_RETURN_TOP(end_membername(parser)); } 11039 break; 11040 case 13: 11041 #line 1173 "upb/json/parser.rl" 11042 { end_member(parser); } 11043 break; 11044 case 14: 11045 #line 1179 "upb/json/parser.rl" 11046 { start_object(parser); } 11047 break; 11048 case 15: 11049 #line 1182 "upb/json/parser.rl" 11050 { end_object(parser); } 11051 break; 11052 case 16: 11053 #line 1188 "upb/json/parser.rl" 11054 { CHECK_RETURN_TOP(start_array(parser)); } 11055 break; 11056 case 17: 11057 #line 1192 "upb/json/parser.rl" 11058 { end_array(parser); } 11059 break; 11060 case 18: 11061 #line 1197 "upb/json/parser.rl" 11062 { start_number(parser, p); } 11063 break; 11064 case 19: 11065 #line 1198 "upb/json/parser.rl" 11066 { CHECK_RETURN_TOP(end_number(parser, p)); } 11067 break; 11068 case 20: 11069 #line 1200 "upb/json/parser.rl" 11070 { CHECK_RETURN_TOP(start_stringval(parser)); } 11071 break; 11072 case 21: 11073 #line 1201 "upb/json/parser.rl" 11074 { CHECK_RETURN_TOP(end_stringval(parser)); } 11075 break; 11076 case 22: 11077 #line 1203 "upb/json/parser.rl" 11078 { CHECK_RETURN_TOP(parser_putbool(parser, true)); } 11079 break; 11080 case 23: 11081 #line 1205 "upb/json/parser.rl" 11082 { CHECK_RETURN_TOP(parser_putbool(parser, false)); } 11083 break; 11084 case 24: 11085 #line 1207 "upb/json/parser.rl" 11086 { /* null value */ } 11087 break; 11088 case 25: 11089 #line 1209 "upb/json/parser.rl" 11090 { CHECK_RETURN_TOP(start_subobject(parser)); } 11091 break; 11092 case 26: 11093 #line 1210 "upb/json/parser.rl" 11094 { end_subobject(parser); } 11095 break; 11096 case 27: 11097 #line 1215 "upb/json/parser.rl" 11098 { p--; {cs = stack[--top]; goto _again;} } 11099 break; 11100 #line 1487 "upb/json/parser.c" 11101 } 11102 } 11103 11104 _again: 11105 if ( cs == 0 ) 11106 goto _out; 11107 if ( ++p != pe ) 11108 goto _resume; 11109 _test_eof: {} 11110 _out: {} 11111 } 11112 11113 #line 1242 "upb/json/parser.rl" 11114 11115 if (p != pe) { 11116 upb_status_seterrf(&parser->status, "Parse error at %s\n", p); 11117 upb_env_reporterror(parser->env, &parser->status); 11118 } else { 11119 capture_suspend(parser, &p); 11120 } 11121 11122 error: 11123 /* Save parsing state back to parser. */ 11124 parser->current_state = cs; 11125 parser->parser_top = top; 11126 11127 return p - buf; 11128 } 11129 11130 bool end(void *closure, const void *hd) { 11131 UPB_UNUSED(closure); 11132 UPB_UNUSED(hd); 11133 11134 /* Prevent compile warning on unused static constants. */ 11135 UPB_UNUSED(json_start); 11136 UPB_UNUSED(json_en_number_machine); 11137 UPB_UNUSED(json_en_string_machine); 11138 UPB_UNUSED(json_en_value_machine); 11139 UPB_UNUSED(json_en_main); 11140 return true; 11141 } 11142 11143 static void json_parser_reset(upb_json_parser *p) { 11144 int cs; 11145 int top; 11146 11147 p->top = p->stack; 11148 p->top->f = NULL; 11149 p->top->is_map = false; 11150 p->top->is_mapentry = false; 11151 11152 /* Emit Ragel initialization of the parser. */ 11153 11154 #line 1541 "upb/json/parser.c" 11155 { 11156 cs = json_start; 11157 top = 0; 11158 } 11159 11160 #line 1282 "upb/json/parser.rl" 11161 p->current_state = cs; 11162 p->parser_top = top; 11163 accumulate_clear(p); 11164 p->multipart_state = MULTIPART_INACTIVE; 11165 p->capture = NULL; 11166 p->accumulated = NULL; 11167 upb_status_clear(&p->status); 11168 } 11169 11170 11171 /* Public API *****************************************************************/ 11172 11173 upb_json_parser *upb_json_parser_create(upb_env *env, upb_sink *output) { 11174 #ifndef NDEBUG 11175 const size_t size_before = upb_env_bytesallocated(env); 11176 #endif 11177 upb_json_parser *p = upb_env_malloc(env, sizeof(upb_json_parser)); 11178 if (!p) return false; 11179 11180 p->env = env; 11181 p->limit = p->stack + UPB_JSON_MAX_DEPTH; 11182 p->accumulate_buf = NULL; 11183 p->accumulate_buf_size = 0; 11184 upb_byteshandler_init(&p->input_handler_); 11185 upb_byteshandler_setstring(&p->input_handler_, parse, NULL); 11186 upb_byteshandler_setendstr(&p->input_handler_, end, NULL); 11187 upb_bytessink_reset(&p->input_, &p->input_handler_, p); 11188 11189 json_parser_reset(p); 11190 upb_sink_reset(&p->top->sink, output->handlers, output->closure); 11191 p->top->m = upb_handlers_msgdef(output->handlers); 11192 11193 /* If this fails, uncomment and increase the value in parser.h. */ 11194 /* fprintf(stderr, "%zd\n", upb_env_bytesallocated(env) - size_before); */ 11195 assert(upb_env_bytesallocated(env) - size_before <= UPB_JSON_PARSER_SIZE); 11196 return p; 11197 } 11198 11199 upb_bytessink *upb_json_parser_input(upb_json_parser *p) { 11200 return &p->input_; 11201 } 11202 /* 11203 ** This currently uses snprintf() to format primitives, and could be optimized 11204 ** further. 11205 */ 11206 11207 11208 #include <stdlib.h> 11209 #include <stdio.h> 11210 #include <string.h> 11211 #include <stdint.h> 11212 11213 struct upb_json_printer { 11214 upb_sink input_; 11215 /* BytesSink closure. */ 11216 void *subc_; 11217 upb_bytessink *output_; 11218 11219 /* We track the depth so that we know when to emit startstr/endstr on the 11220 * output. */ 11221 int depth_; 11222 11223 /* Have we emitted the first element? This state is necessary to emit commas 11224 * without leaving a trailing comma in arrays/maps. We keep this state per 11225 * frame depth. 11226 * 11227 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. 11228 * We count frames (contexts in which we separate elements by commas) as both 11229 * repeated fields and messages (maps), and the worst case is a 11230 * message->repeated field->submessage->repeated field->... nesting. */ 11231 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; 11232 }; 11233 11234 /* StringPiece; a pointer plus a length. */ 11235 typedef struct { 11236 const char *ptr; 11237 size_t len; 11238 } strpc; 11239 11240 strpc *newstrpc(upb_handlers *h, const upb_fielddef *f) { 11241 strpc *ret = malloc(sizeof(*ret)); 11242 ret->ptr = upb_fielddef_name(f); 11243 ret->len = strlen(ret->ptr); 11244 upb_handlers_addcleanup(h, ret, free); 11245 return ret; 11246 } 11247 11248 /* ------------ JSON string printing: values, maps, arrays ------------------ */ 11249 11250 static void print_data( 11251 upb_json_printer *p, const char *buf, unsigned int len) { 11252 /* TODO: Will need to change if we support pushback from the sink. */ 11253 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL); 11254 UPB_ASSERT_VAR(n, n == len); 11255 } 11256 11257 static void print_comma(upb_json_printer *p) { 11258 if (!p->first_elem_[p->depth_]) { 11259 print_data(p, ",", 1); 11260 } 11261 p->first_elem_[p->depth_] = false; 11262 } 11263 11264 /* Helpers that print properly formatted elements to the JSON output stream. */ 11265 11266 /* Used for escaping control chars in strings. */ 11267 static const char kControlCharLimit = 0x20; 11268 11269 UPB_INLINE bool is_json_escaped(char c) { 11270 /* See RFC 4627. */ 11271 unsigned char uc = (unsigned char)c; 11272 return uc < kControlCharLimit || uc == '"' || uc == '\\'; 11273 } 11274 11275 UPB_INLINE char* json_nice_escape(char c) { 11276 switch (c) { 11277 case '"': return "\\\""; 11278 case '\\': return "\\\\"; 11279 case '\b': return "\\b"; 11280 case '\f': return "\\f"; 11281 case '\n': return "\\n"; 11282 case '\r': return "\\r"; 11283 case '\t': return "\\t"; 11284 default: return NULL; 11285 } 11286 } 11287 11288 /* Write a properly escaped string chunk. The surrounding quotes are *not* 11289 * printed; this is so that the caller has the option of emitting the string 11290 * content in chunks. */ 11291 static void putstring(upb_json_printer *p, const char *buf, unsigned int len) { 11292 const char* unescaped_run = NULL; 11293 unsigned int i; 11294 for (i = 0; i < len; i++) { 11295 char c = buf[i]; 11296 /* Handle escaping. */ 11297 if (is_json_escaped(c)) { 11298 /* Use a "nice" escape, like \n, if one exists for this character. */ 11299 const char* escape = json_nice_escape(c); 11300 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style 11301 * escape. */ 11302 char escape_buf[8]; 11303 if (!escape) { 11304 unsigned char byte = (unsigned char)c; 11305 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte); 11306 escape = escape_buf; 11307 } 11308 11309 /* N.B. that we assume that the input encoding is equal to the output 11310 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we 11311 * can simply pass the bytes through. */ 11312 11313 /* If there's a current run of unescaped chars, print that run first. */ 11314 if (unescaped_run) { 11315 print_data(p, unescaped_run, &buf[i] - unescaped_run); 11316 unescaped_run = NULL; 11317 } 11318 /* Then print the escape code. */ 11319 print_data(p, escape, strlen(escape)); 11320 } else { 11321 /* Add to the current unescaped run of characters. */ 11322 if (unescaped_run == NULL) { 11323 unescaped_run = &buf[i]; 11324 } 11325 } 11326 } 11327 11328 /* If the string ended in a run of unescaped characters, print that last run. */ 11329 if (unescaped_run) { 11330 print_data(p, unescaped_run, &buf[len] - unescaped_run); 11331 } 11332 } 11333 11334 #define CHKLENGTH(x) if (!(x)) return -1; 11335 11336 /* Helpers that format floating point values according to our custom formats. 11337 * Right now we use %.8g and %.17g for float/double, respectively, to match 11338 * proto2::util::JsonFormat's defaults. May want to change this later. */ 11339 11340 static size_t fmt_double(double val, char* buf, size_t length) { 11341 size_t n = _upb_snprintf(buf, length, "%.17g", val); 11342 CHKLENGTH(n > 0 && n < length); 11343 return n; 11344 } 11345 11346 static size_t fmt_float(float val, char* buf, size_t length) { 11347 size_t n = _upb_snprintf(buf, length, "%.8g", val); 11348 CHKLENGTH(n > 0 && n < length); 11349 return n; 11350 } 11351 11352 static size_t fmt_bool(bool val, char* buf, size_t length) { 11353 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false")); 11354 CHKLENGTH(n > 0 && n < length); 11355 return n; 11356 } 11357 11358 static size_t fmt_int64(long val, char* buf, size_t length) { 11359 size_t n = _upb_snprintf(buf, length, "%ld", val); 11360 CHKLENGTH(n > 0 && n < length); 11361 return n; 11362 } 11363 11364 static size_t fmt_uint64(unsigned long long val, char* buf, size_t length) { 11365 size_t n = _upb_snprintf(buf, length, "%llu", val); 11366 CHKLENGTH(n > 0 && n < length); 11367 return n; 11368 } 11369 11370 /* Print a map key given a field name. Called by scalar field handlers and by 11371 * startseq for repeated fields. */ 11372 static bool putkey(void *closure, const void *handler_data) { 11373 upb_json_printer *p = closure; 11374 const strpc *key = handler_data; 11375 print_comma(p); 11376 print_data(p, "\"", 1); 11377 putstring(p, key->ptr, key->len); 11378 print_data(p, "\":", 2); 11379 return true; 11380 } 11381 11382 #define CHKFMT(val) if ((val) == (size_t)-1) return false; 11383 #define CHK(val) if (!(val)) return false; 11384 11385 #define TYPE_HANDLERS(type, fmt_func) \ 11386 static bool put##type(void *closure, const void *handler_data, type val) { \ 11387 upb_json_printer *p = closure; \ 11388 char data[64]; \ 11389 size_t length = fmt_func(val, data, sizeof(data)); \ 11390 UPB_UNUSED(handler_data); \ 11391 CHKFMT(length); \ 11392 print_data(p, data, length); \ 11393 return true; \ 11394 } \ 11395 static bool scalar_##type(void *closure, const void *handler_data, \ 11396 type val) { \ 11397 CHK(putkey(closure, handler_data)); \ 11398 CHK(put##type(closure, handler_data, val)); \ 11399 return true; \ 11400 } \ 11401 static bool repeated_##type(void *closure, const void *handler_data, \ 11402 type val) { \ 11403 upb_json_printer *p = closure; \ 11404 print_comma(p); \ 11405 CHK(put##type(closure, handler_data, val)); \ 11406 return true; \ 11407 } 11408 11409 #define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ 11410 static bool putmapkey_##type(void *closure, const void *handler_data, \ 11411 type val) { \ 11412 upb_json_printer *p = closure; \ 11413 print_data(p, "\"", 1); \ 11414 CHK(put##type(closure, handler_data, val)); \ 11415 print_data(p, "\":", 2); \ 11416 return true; \ 11417 } 11418 11419 TYPE_HANDLERS(double, fmt_double) 11420 TYPE_HANDLERS(float, fmt_float) 11421 TYPE_HANDLERS(bool, fmt_bool) 11422 TYPE_HANDLERS(int32_t, fmt_int64) 11423 TYPE_HANDLERS(uint32_t, fmt_int64) 11424 TYPE_HANDLERS(int64_t, fmt_int64) 11425 TYPE_HANDLERS(uint64_t, fmt_uint64) 11426 11427 /* double and float are not allowed to be map keys. */ 11428 TYPE_HANDLERS_MAPKEY(bool, fmt_bool) 11429 TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64) 11430 TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64) 11431 TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64) 11432 TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64) 11433 11434 #undef TYPE_HANDLERS 11435 #undef TYPE_HANDLERS_MAPKEY 11436 11437 typedef struct { 11438 void *keyname; 11439 const upb_enumdef *enumdef; 11440 } EnumHandlerData; 11441 11442 static bool scalar_enum(void *closure, const void *handler_data, 11443 int32_t val) { 11444 const EnumHandlerData *hd = handler_data; 11445 upb_json_printer *p = closure; 11446 const char *symbolic_name; 11447 11448 CHK(putkey(closure, hd->keyname)); 11449 11450 symbolic_name = upb_enumdef_iton(hd->enumdef, val); 11451 if (symbolic_name) { 11452 print_data(p, "\"", 1); 11453 putstring(p, symbolic_name, strlen(symbolic_name)); 11454 print_data(p, "\"", 1); 11455 } else { 11456 putint32_t(closure, NULL, val); 11457 } 11458 11459 return true; 11460 } 11461 11462 static void print_enum_symbolic_name(upb_json_printer *p, 11463 const upb_enumdef *def, 11464 int32_t val) { 11465 const char *symbolic_name = upb_enumdef_iton(def, val); 11466 if (symbolic_name) { 11467 print_data(p, "\"", 1); 11468 putstring(p, symbolic_name, strlen(symbolic_name)); 11469 print_data(p, "\"", 1); 11470 } else { 11471 putint32_t(p, NULL, val); 11472 } 11473 } 11474 11475 static bool repeated_enum(void *closure, const void *handler_data, 11476 int32_t val) { 11477 const EnumHandlerData *hd = handler_data; 11478 upb_json_printer *p = closure; 11479 print_comma(p); 11480 11481 print_enum_symbolic_name(p, hd->enumdef, val); 11482 11483 return true; 11484 } 11485 11486 static bool mapvalue_enum(void *closure, const void *handler_data, 11487 int32_t val) { 11488 const EnumHandlerData *hd = handler_data; 11489 upb_json_printer *p = closure; 11490 11491 print_enum_symbolic_name(p, hd->enumdef, val); 11492 11493 return true; 11494 } 11495 11496 static void *scalar_startsubmsg(void *closure, const void *handler_data) { 11497 return putkey(closure, handler_data) ? closure : UPB_BREAK; 11498 } 11499 11500 static void *repeated_startsubmsg(void *closure, const void *handler_data) { 11501 upb_json_printer *p = closure; 11502 UPB_UNUSED(handler_data); 11503 print_comma(p); 11504 return closure; 11505 } 11506 11507 static void start_frame(upb_json_printer *p) { 11508 p->depth_++; 11509 p->first_elem_[p->depth_] = true; 11510 print_data(p, "{", 1); 11511 } 11512 11513 static void end_frame(upb_json_printer *p) { 11514 print_data(p, "}", 1); 11515 p->depth_--; 11516 } 11517 11518 static bool printer_startmsg(void *closure, const void *handler_data) { 11519 upb_json_printer *p = closure; 11520 UPB_UNUSED(handler_data); 11521 if (p->depth_ == 0) { 11522 upb_bytessink_start(p->output_, 0, &p->subc_); 11523 } 11524 start_frame(p); 11525 return true; 11526 } 11527 11528 static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { 11529 upb_json_printer *p = closure; 11530 UPB_UNUSED(handler_data); 11531 UPB_UNUSED(s); 11532 end_frame(p); 11533 if (p->depth_ == 0) { 11534 upb_bytessink_end(p->output_); 11535 } 11536 return true; 11537 } 11538 11539 static void *startseq(void *closure, const void *handler_data) { 11540 upb_json_printer *p = closure; 11541 CHK(putkey(closure, handler_data)); 11542 p->depth_++; 11543 p->first_elem_[p->depth_] = true; 11544 print_data(p, "[", 1); 11545 return closure; 11546 } 11547 11548 static bool endseq(void *closure, const void *handler_data) { 11549 upb_json_printer *p = closure; 11550 UPB_UNUSED(handler_data); 11551 print_data(p, "]", 1); 11552 p->depth_--; 11553 return true; 11554 } 11555 11556 static void *startmap(void *closure, const void *handler_data) { 11557 upb_json_printer *p = closure; 11558 CHK(putkey(closure, handler_data)); 11559 p->depth_++; 11560 p->first_elem_[p->depth_] = true; 11561 print_data(p, "{", 1); 11562 return closure; 11563 } 11564 11565 static bool endmap(void *closure, const void *handler_data) { 11566 upb_json_printer *p = closure; 11567 UPB_UNUSED(handler_data); 11568 print_data(p, "}", 1); 11569 p->depth_--; 11570 return true; 11571 } 11572 11573 static size_t putstr(void *closure, const void *handler_data, const char *str, 11574 size_t len, const upb_bufhandle *handle) { 11575 upb_json_printer *p = closure; 11576 UPB_UNUSED(handler_data); 11577 UPB_UNUSED(handle); 11578 putstring(p, str, len); 11579 return len; 11580 } 11581 11582 /* This has to Base64 encode the bytes, because JSON has no "bytes" type. */ 11583 static size_t putbytes(void *closure, const void *handler_data, const char *str, 11584 size_t len, const upb_bufhandle *handle) { 11585 upb_json_printer *p = closure; 11586 11587 /* This is the regular base64, not the "web-safe" version. */ 11588 static const char base64[] = 11589 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 11590 11591 /* Base64-encode. */ 11592 char data[16000]; 11593 const char *limit = data + sizeof(data); 11594 const unsigned char *from = (const unsigned char*)str; 11595 char *to = data; 11596 size_t remaining = len; 11597 size_t bytes; 11598 11599 UPB_UNUSED(handler_data); 11600 UPB_UNUSED(handle); 11601 11602 while (remaining > 2) { 11603 /* TODO(haberman): handle encoded lengths > sizeof(data) */ 11604 UPB_ASSERT_VAR(limit, (limit - to) >= 4); 11605 11606 to[0] = base64[from[0] >> 2]; 11607 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; 11608 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)]; 11609 to[3] = base64[from[2] & 0x3f]; 11610 11611 remaining -= 3; 11612 to += 4; 11613 from += 3; 11614 } 11615 11616 switch (remaining) { 11617 case 2: 11618 to[0] = base64[from[0] >> 2]; 11619 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; 11620 to[2] = base64[(from[1] & 0xf) << 2]; 11621 to[3] = '='; 11622 to += 4; 11623 from += 2; 11624 break; 11625 case 1: 11626 to[0] = base64[from[0] >> 2]; 11627 to[1] = base64[((from[0] & 0x3) << 4)]; 11628 to[2] = '='; 11629 to[3] = '='; 11630 to += 4; 11631 from += 1; 11632 break; 11633 } 11634 11635 bytes = to - data; 11636 print_data(p, "\"", 1); 11637 putstring(p, data, bytes); 11638 print_data(p, "\"", 1); 11639 return len; 11640 } 11641 11642 static void *scalar_startstr(void *closure, const void *handler_data, 11643 size_t size_hint) { 11644 upb_json_printer *p = closure; 11645 UPB_UNUSED(handler_data); 11646 UPB_UNUSED(size_hint); 11647 CHK(putkey(closure, handler_data)); 11648 print_data(p, "\"", 1); 11649 return p; 11650 } 11651 11652 static size_t scalar_str(void *closure, const void *handler_data, 11653 const char *str, size_t len, 11654 const upb_bufhandle *handle) { 11655 CHK(putstr(closure, handler_data, str, len, handle)); 11656 return len; 11657 } 11658 11659 static bool scalar_endstr(void *closure, const void *handler_data) { 11660 upb_json_printer *p = closure; 11661 UPB_UNUSED(handler_data); 11662 print_data(p, "\"", 1); 11663 return true; 11664 } 11665 11666 static void *repeated_startstr(void *closure, const void *handler_data, 11667 size_t size_hint) { 11668 upb_json_printer *p = closure; 11669 UPB_UNUSED(handler_data); 11670 UPB_UNUSED(size_hint); 11671 print_comma(p); 11672 print_data(p, "\"", 1); 11673 return p; 11674 } 11675 11676 static size_t repeated_str(void *closure, const void *handler_data, 11677 const char *str, size_t len, 11678 const upb_bufhandle *handle) { 11679 CHK(putstr(closure, handler_data, str, len, handle)); 11680 return len; 11681 } 11682 11683 static bool repeated_endstr(void *closure, const void *handler_data) { 11684 upb_json_printer *p = closure; 11685 UPB_UNUSED(handler_data); 11686 print_data(p, "\"", 1); 11687 return true; 11688 } 11689 11690 static void *mapkeyval_startstr(void *closure, const void *handler_data, 11691 size_t size_hint) { 11692 upb_json_printer *p = closure; 11693 UPB_UNUSED(handler_data); 11694 UPB_UNUSED(size_hint); 11695 print_data(p, "\"", 1); 11696 return p; 11697 } 11698 11699 static size_t mapkey_str(void *closure, const void *handler_data, 11700 const char *str, size_t len, 11701 const upb_bufhandle *handle) { 11702 CHK(putstr(closure, handler_data, str, len, handle)); 11703 return len; 11704 } 11705 11706 static bool mapkey_endstr(void *closure, const void *handler_data) { 11707 upb_json_printer *p = closure; 11708 UPB_UNUSED(handler_data); 11709 print_data(p, "\":", 2); 11710 return true; 11711 } 11712 11713 static bool mapvalue_endstr(void *closure, const void *handler_data) { 11714 upb_json_printer *p = closure; 11715 UPB_UNUSED(handler_data); 11716 print_data(p, "\"", 1); 11717 return true; 11718 } 11719 11720 static size_t scalar_bytes(void *closure, const void *handler_data, 11721 const char *str, size_t len, 11722 const upb_bufhandle *handle) { 11723 CHK(putkey(closure, handler_data)); 11724 CHK(putbytes(closure, handler_data, str, len, handle)); 11725 return len; 11726 } 11727 11728 static size_t repeated_bytes(void *closure, const void *handler_data, 11729 const char *str, size_t len, 11730 const upb_bufhandle *handle) { 11731 upb_json_printer *p = closure; 11732 print_comma(p); 11733 CHK(putbytes(closure, handler_data, str, len, handle)); 11734 return len; 11735 } 11736 11737 static size_t mapkey_bytes(void *closure, const void *handler_data, 11738 const char *str, size_t len, 11739 const upb_bufhandle *handle) { 11740 upb_json_printer *p = closure; 11741 CHK(putbytes(closure, handler_data, str, len, handle)); 11742 print_data(p, ":", 1); 11743 return len; 11744 } 11745 11746 static void set_enum_hd(upb_handlers *h, 11747 const upb_fielddef *f, 11748 upb_handlerattr *attr) { 11749 EnumHandlerData *hd = malloc(sizeof(EnumHandlerData)); 11750 hd->enumdef = (const upb_enumdef *)upb_fielddef_subdef(f); 11751 hd->keyname = newstrpc(h, f); 11752 upb_handlers_addcleanup(h, hd, free); 11753 upb_handlerattr_sethandlerdata(attr, hd); 11754 } 11755 11756 /* Set up handlers for a mapentry submessage (i.e., an individual key/value pair 11757 * in a map). 11758 * 11759 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated 11760 * key or value cases properly. The right way to do this is to allocate a 11761 * temporary structure at the start of a mapentry submessage, store key and 11762 * value data in it as key and value handlers are called, and then print the 11763 * key/value pair once at the end of the submessage. If we don't do this, we 11764 * should at least detect the case and throw an error. However, so far all of 11765 * our sources that emit mapentry messages do so canonically (with one key 11766 * field, and then one value field), so this is not a pressing concern at the 11767 * moment. */ 11768 void printer_sethandlers_mapentry(const void *closure, upb_handlers *h) { 11769 const upb_msgdef *md = upb_handlers_msgdef(h); 11770 11771 /* A mapentry message is printed simply as '"key": value'. Rather than 11772 * special-case key and value for every type below, we just handle both 11773 * fields explicitly here. */ 11774 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); 11775 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); 11776 11777 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; 11778 11779 UPB_UNUSED(closure); 11780 11781 switch (upb_fielddef_type(key_field)) { 11782 case UPB_TYPE_INT32: 11783 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); 11784 break; 11785 case UPB_TYPE_INT64: 11786 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); 11787 break; 11788 case UPB_TYPE_UINT32: 11789 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); 11790 break; 11791 case UPB_TYPE_UINT64: 11792 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); 11793 break; 11794 case UPB_TYPE_BOOL: 11795 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); 11796 break; 11797 case UPB_TYPE_STRING: 11798 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); 11799 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); 11800 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); 11801 break; 11802 case UPB_TYPE_BYTES: 11803 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); 11804 break; 11805 default: 11806 assert(false); 11807 break; 11808 } 11809 11810 switch (upb_fielddef_type(value_field)) { 11811 case UPB_TYPE_INT32: 11812 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); 11813 break; 11814 case UPB_TYPE_INT64: 11815 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); 11816 break; 11817 case UPB_TYPE_UINT32: 11818 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); 11819 break; 11820 case UPB_TYPE_UINT64: 11821 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); 11822 break; 11823 case UPB_TYPE_BOOL: 11824 upb_handlers_setbool(h, value_field, putbool, &empty_attr); 11825 break; 11826 case UPB_TYPE_FLOAT: 11827 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); 11828 break; 11829 case UPB_TYPE_DOUBLE: 11830 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); 11831 break; 11832 case UPB_TYPE_STRING: 11833 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); 11834 upb_handlers_setstring(h, value_field, putstr, &empty_attr); 11835 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); 11836 break; 11837 case UPB_TYPE_BYTES: 11838 upb_handlers_setstring(h, value_field, putbytes, &empty_attr); 11839 break; 11840 case UPB_TYPE_ENUM: { 11841 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; 11842 set_enum_hd(h, value_field, &enum_attr); 11843 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); 11844 upb_handlerattr_uninit(&enum_attr); 11845 break; 11846 } 11847 case UPB_TYPE_MESSAGE: 11848 /* No handler necessary -- the submsg handlers will print the message 11849 * as appropriate. */ 11850 break; 11851 } 11852 11853 upb_handlerattr_uninit(&empty_attr); 11854 } 11855 11856 void printer_sethandlers(const void *closure, upb_handlers *h) { 11857 const upb_msgdef *md = upb_handlers_msgdef(h); 11858 bool is_mapentry = upb_msgdef_mapentry(md); 11859 upb_handlerattr empty_attr = UPB_HANDLERATTR_INITIALIZER; 11860 upb_msg_field_iter i; 11861 11862 UPB_UNUSED(closure); 11863 11864 if (is_mapentry) { 11865 /* mapentry messages are sufficiently different that we handle them 11866 * separately. */ 11867 printer_sethandlers_mapentry(closure, h); 11868 return; 11869 } 11870 11871 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); 11872 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); 11873 11874 #define TYPE(type, name, ctype) \ 11875 case type: \ 11876 if (upb_fielddef_isseq(f)) { \ 11877 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ 11878 } else { \ 11879 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ 11880 } \ 11881 break; 11882 11883 upb_msg_field_begin(&i, md); 11884 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { 11885 const upb_fielddef *f = upb_msg_iter_field(&i); 11886 11887 upb_handlerattr name_attr = UPB_HANDLERATTR_INITIALIZER; 11888 upb_handlerattr_sethandlerdata(&name_attr, newstrpc(h, f)); 11889 11890 if (upb_fielddef_ismap(f)) { 11891 upb_handlers_setstartseq(h, f, startmap, &name_attr); 11892 upb_handlers_setendseq(h, f, endmap, &name_attr); 11893 } else if (upb_fielddef_isseq(f)) { 11894 upb_handlers_setstartseq(h, f, startseq, &name_attr); 11895 upb_handlers_setendseq(h, f, endseq, &empty_attr); 11896 } 11897 11898 switch (upb_fielddef_type(f)) { 11899 TYPE(UPB_TYPE_FLOAT, float, float); 11900 TYPE(UPB_TYPE_DOUBLE, double, double); 11901 TYPE(UPB_TYPE_BOOL, bool, bool); 11902 TYPE(UPB_TYPE_INT32, int32, int32_t); 11903 TYPE(UPB_TYPE_UINT32, uint32, uint32_t); 11904 TYPE(UPB_TYPE_INT64, int64, int64_t); 11905 TYPE(UPB_TYPE_UINT64, uint64, uint64_t); 11906 case UPB_TYPE_ENUM: { 11907 /* For now, we always emit symbolic names for enums. We may want an 11908 * option later to control this behavior, but we will wait for a real 11909 * need first. */ 11910 upb_handlerattr enum_attr = UPB_HANDLERATTR_INITIALIZER; 11911 set_enum_hd(h, f, &enum_attr); 11912 11913 if (upb_fielddef_isseq(f)) { 11914 upb_handlers_setint32(h, f, repeated_enum, &enum_attr); 11915 } else { 11916 upb_handlers_setint32(h, f, scalar_enum, &enum_attr); 11917 } 11918 11919 upb_handlerattr_uninit(&enum_attr); 11920 break; 11921 } 11922 case UPB_TYPE_STRING: 11923 if (upb_fielddef_isseq(f)) { 11924 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); 11925 upb_handlers_setstring(h, f, repeated_str, &empty_attr); 11926 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); 11927 } else { 11928 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); 11929 upb_handlers_setstring(h, f, scalar_str, &empty_attr); 11930 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); 11931 } 11932 break; 11933 case UPB_TYPE_BYTES: 11934 /* XXX: this doesn't support strings that span buffers yet. The base64 11935 * encoder will need to be made resumable for this to work properly. */ 11936 if (upb_fielddef_isseq(f)) { 11937 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); 11938 } else { 11939 upb_handlers_setstring(h, f, scalar_bytes, &name_attr); 11940 } 11941 break; 11942 case UPB_TYPE_MESSAGE: 11943 if (upb_fielddef_isseq(f)) { 11944 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr); 11945 } else { 11946 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr); 11947 } 11948 break; 11949 } 11950 11951 upb_handlerattr_uninit(&name_attr); 11952 } 11953 11954 upb_handlerattr_uninit(&empty_attr); 11955 #undef TYPE 11956 } 11957 11958 static void json_printer_reset(upb_json_printer *p) { 11959 p->depth_ = 0; 11960 } 11961 11962 11963 /* Public API *****************************************************************/ 11964 11965 upb_json_printer *upb_json_printer_create(upb_env *e, const upb_handlers *h, 11966 upb_bytessink *output) { 11967 #ifndef NDEBUG 11968 size_t size_before = upb_env_bytesallocated(e); 11969 #endif 11970 11971 upb_json_printer *p = upb_env_malloc(e, sizeof(upb_json_printer)); 11972 if (!p) return NULL; 11973 11974 p->output_ = output; 11975 json_printer_reset(p); 11976 upb_sink_reset(&p->input_, h, p); 11977 11978 /* If this fails, increase the value in printer.h. */ 11979 assert(upb_env_bytesallocated(e) - size_before <= UPB_JSON_PRINTER_SIZE); 11980 return p; 11981 } 11982 11983 upb_sink *upb_json_printer_input(upb_json_printer *p) { 11984 return &p->input_; 11985 } 11986 11987 const upb_handlers *upb_json_printer_newhandlers(const upb_msgdef *md, 11988 const void *owner) { 11989 return upb_handlers_newfrozen(md, owner, printer_sethandlers, NULL); 11990 } 11991