1 #include <stdint.h> 2 #include <protobuf.h> 3 4 // ----------------------------------------------------------------------------- 5 // PHP <-> native slot management. 6 // ----------------------------------------------------------------------------- 7 8 static zval* int32_to_zval(int32_t value) { 9 zval* tmp; 10 MAKE_STD_ZVAL(tmp); 11 ZVAL_LONG(tmp, value); 12 php_printf("int32 to zval\n"); 13 // ZVAL_LONG(tmp, 1); 14 return tmp; 15 } 16 17 #define DEREF(memory, type) *(type*)(memory) 18 19 size_t native_slot_size(upb_fieldtype_t type) { 20 switch (type) { 21 case UPB_TYPE_FLOAT: return 4; 22 case UPB_TYPE_DOUBLE: return 8; 23 case UPB_TYPE_BOOL: return 1; 24 case UPB_TYPE_STRING: return sizeof(zval*); 25 case UPB_TYPE_BYTES: return sizeof(zval*); 26 case UPB_TYPE_MESSAGE: return sizeof(zval*); 27 case UPB_TYPE_ENUM: return 4; 28 case UPB_TYPE_INT32: return 4; 29 case UPB_TYPE_INT64: return 8; 30 case UPB_TYPE_UINT32: return 4; 31 case UPB_TYPE_UINT64: return 8; 32 default: return 0; 33 } 34 } 35 36 static bool is_php_num(zval* value) { 37 // Is numerial string also valid? 38 return (Z_TYPE_P(value) == IS_LONG || 39 Z_TYPE_P(value) == IS_DOUBLE); 40 } 41 42 void native_slot_check_int_range_precision(upb_fieldtype_t type, zval* val) { 43 // TODO(teboring): Add it back. 44 // if (!is_php_num(val)) { 45 // zend_error(E_ERROR, "Expected number type for integral field."); 46 // } 47 48 // if (Z_TYPE_P(val) == IS_DOUBLE) { 49 // double dbl_val = NUM2DBL(val); 50 // if (floor(dbl_val) != dbl_val) { 51 // zend_error(E_ERROR, 52 // "Non-integral floating point value assigned to integer field."); 53 // } 54 // } 55 // if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) { 56 // if (NUM2DBL(val) < 0) { 57 // zend_error(E_ERROR, 58 // "Assigning negative value to unsigned integer field."); 59 // } 60 // } 61 } 62 63 zval* native_slot_get(upb_fieldtype_t type, /*VALUE type_class,*/ 64 const void* memory TSRMLS_DC) { 65 zval* retval = NULL; 66 switch (type) { 67 // TODO(teboring): Add it back. 68 // case UPB_TYPE_FLOAT: 69 // return DBL2NUM(DEREF(memory, float)); 70 // case UPB_TYPE_DOUBLE: 71 // return DBL2NUM(DEREF(memory, double)); 72 // case UPB_TYPE_BOOL: 73 // return DEREF(memory, int8_t) ? Qtrue : Qfalse; 74 // case UPB_TYPE_STRING: 75 // case UPB_TYPE_BYTES: 76 // case UPB_TYPE_MESSAGE: 77 // return DEREF(memory, VALUE); 78 // case UPB_TYPE_ENUM: { 79 // int32_t val = DEREF(memory, int32_t); 80 // VALUE symbol = enum_lookup(type_class, INT2NUM(val)); 81 // if (symbol == Qnil) { 82 // return INT2NUM(val); 83 // } else { 84 // return symbol; 85 // } 86 // } 87 case UPB_TYPE_INT32: 88 return int32_to_zval(DEREF(memory, int32_t)); 89 // TODO(teboring): Add it back. 90 // case UPB_TYPE_INT64: 91 // return LL2NUM(DEREF(memory, int64_t)); 92 // case UPB_TYPE_UINT32: 93 // return UINT2NUM(DEREF(memory, uint32_t)); 94 // case UPB_TYPE_UINT64: 95 // return ULL2NUM(DEREF(memory, uint64_t)); 96 default: 97 return EG(uninitialized_zval_ptr); 98 } 99 } 100 101 void native_slot_init(upb_fieldtype_t type, void* memory) { 102 switch (type) { 103 case UPB_TYPE_FLOAT: 104 DEREF(memory, float) = 0.0; 105 break; 106 case UPB_TYPE_DOUBLE: 107 DEREF(memory, double) = 0.0; 108 break; 109 case UPB_TYPE_BOOL: 110 DEREF(memory, int8_t) = 0; 111 break; 112 // TODO(teboring): Add it back. 113 // case UPB_TYPE_STRING: 114 // case UPB_TYPE_BYTES: 115 // DEREF(memory, VALUE) = php_str_new2(""); 116 // php_enc_associate(DEREF(memory, VALUE), (type == UPB_TYPE_BYTES) 117 // ? kRubyString8bitEncoding 118 // : kRubyStringUtf8Encoding); 119 // break; 120 // case UPB_TYPE_MESSAGE: 121 // DEREF(memory, VALUE) = Qnil; 122 // break; 123 case UPB_TYPE_ENUM: 124 case UPB_TYPE_INT32: 125 DEREF(memory, int32_t) = 0; 126 break; 127 case UPB_TYPE_INT64: 128 DEREF(memory, int64_t) = 0; 129 break; 130 case UPB_TYPE_UINT32: 131 DEREF(memory, uint32_t) = 0; 132 break; 133 case UPB_TYPE_UINT64: 134 DEREF(memory, uint64_t) = 0; 135 break; 136 default: 137 break; 138 } 139 } 140 141 void native_slot_set(upb_fieldtype_t type, /*VALUE type_class,*/ void* memory, 142 zval* value) { 143 native_slot_set_value_and_case(type, /*type_class,*/ memory, value, NULL, 0); 144 } 145 146 void native_slot_set_value_and_case(upb_fieldtype_t type, /*VALUE type_class,*/ 147 void* memory, zval* value, 148 uint32_t* case_memory, 149 uint32_t case_number) { 150 switch (type) { 151 case UPB_TYPE_FLOAT: 152 if (!Z_TYPE_P(value) == IS_LONG) { 153 zend_error(E_ERROR, "Expected number type for float field."); 154 } 155 DEREF(memory, float) = Z_DVAL_P(value); 156 break; 157 case UPB_TYPE_DOUBLE: 158 // TODO(teboring): Add it back. 159 // if (!is_php_num(value)) { 160 // zend_error(E_ERROR, "Expected number type for double field."); 161 // } 162 // DEREF(memory, double) = Z_DVAL_P(value); 163 break; 164 case UPB_TYPE_BOOL: { 165 int8_t val = -1; 166 if (zval_is_true(value)) { 167 val = 1; 168 } else { 169 val = 0; 170 } 171 // TODO(teboring): Add it back. 172 // else if (value == Qfalse) { 173 // val = 0; 174 // } 175 // else { 176 // php_raise(php_eTypeError, "Invalid argument for boolean field."); 177 // } 178 DEREF(memory, int8_t) = val; 179 break; 180 } 181 case UPB_TYPE_STRING: 182 case UPB_TYPE_BYTES: { 183 // TODO(teboring): Add it back. 184 // if (Z_TYPE_P(value) != IS_STRING) { 185 // zend_error(E_ERROR, "Invalid argument for string field."); 186 // } 187 // native_slot_validate_string_encoding(type, value); 188 // DEREF(memory, zval*) = value; 189 break; 190 } 191 case UPB_TYPE_MESSAGE: { 192 // TODO(teboring): Add it back. 193 // if (CLASS_OF(value) == CLASS_OF(Qnil)) { 194 // value = Qnil; 195 // } else if (CLASS_OF(value) != type_class) { 196 // php_raise(php_eTypeError, 197 // "Invalid type %s to assign to submessage field.", 198 // php_class2name(CLASS_OF(value))); 199 // } 200 // DEREF(memory, VALUE) = value; 201 break; 202 } 203 case UPB_TYPE_ENUM: { 204 // TODO(teboring): Add it back. 205 // int32_t int_val = 0; 206 // if (!is_php_num(value) && TYPE(value) != T_SYMBOL) { 207 // php_raise(php_eTypeError, 208 // "Expected number or symbol type for enum field."); 209 // } 210 // if (TYPE(value) == T_SYMBOL) { 211 // // Ensure that the given symbol exists in the enum module. 212 // VALUE lookup = php_funcall(type_class, php_intern("resolve"), 1, value); 213 // if (lookup == Qnil) { 214 // php_raise(php_eRangeError, "Unknown symbol value for enum field."); 215 // } else { 216 // int_val = NUM2INT(lookup); 217 // } 218 // } else { 219 // native_slot_check_int_range_precision(UPB_TYPE_INT32, value); 220 // int_val = NUM2INT(value); 221 // } 222 // DEREF(memory, int32_t) = int_val; 223 // break; 224 } 225 case UPB_TYPE_INT32: 226 case UPB_TYPE_INT64: 227 case UPB_TYPE_UINT32: 228 case UPB_TYPE_UINT64: 229 native_slot_check_int_range_precision(type, value); 230 switch (type) { 231 case UPB_TYPE_INT32: 232 php_printf("Setting INT32 field\n"); 233 DEREF(memory, int32_t) = Z_LVAL_P(value); 234 break; 235 case UPB_TYPE_INT64: 236 // TODO(teboring): Add it back. 237 // DEREF(memory, int64_t) = NUM2LL(value); 238 break; 239 case UPB_TYPE_UINT32: 240 // TODO(teboring): Add it back. 241 // DEREF(memory, uint32_t) = NUM2UINT(value); 242 break; 243 case UPB_TYPE_UINT64: 244 // TODO(teboring): Add it back. 245 // DEREF(memory, uint64_t) = NUM2ULL(value); 246 break; 247 default: 248 break; 249 } 250 break; 251 default: 252 break; 253 } 254 255 if (case_memory != NULL) { 256 *case_memory = case_number; 257 } 258 } 259 260 // ----------------------------------------------------------------------------- 261 // Map field utilities. 262 // ---------------------------------------------------------------------------- 263 264 const upb_msgdef* tryget_map_entry_msgdef(const upb_fielddef* field) { 265 const upb_msgdef* subdef; 266 if (upb_fielddef_label(field) != UPB_LABEL_REPEATED || 267 upb_fielddef_type(field) != UPB_TYPE_MESSAGE) { 268 return NULL; 269 } 270 subdef = upb_fielddef_msgsubdef(field); 271 return upb_msgdef_mapentry(subdef) ? subdef : NULL; 272 } 273 274 const upb_msgdef* map_entry_msgdef(const upb_fielddef* field) { 275 const upb_msgdef* subdef = tryget_map_entry_msgdef(field); 276 assert(subdef); 277 return subdef; 278 } 279 280 bool is_map_field(const upb_fielddef* field) { 281 return tryget_map_entry_msgdef(field) != NULL; 282 } 283 284 // ----------------------------------------------------------------------------- 285 // Memory layout management. 286 // ----------------------------------------------------------------------------- 287 288 static size_t align_up_to(size_t offset, size_t granularity) { 289 // Granularity must be a power of two. 290 return (offset + granularity - 1) & ~(granularity - 1); 291 } 292 293 MessageLayout* create_layout(const upb_msgdef* msgdef) { 294 MessageLayout* layout = ALLOC(MessageLayout); 295 int nfields = upb_msgdef_numfields(msgdef); 296 upb_msg_field_iter it; 297 upb_msg_oneof_iter oit; 298 size_t off = 0; 299 300 layout->fields = ALLOC_N(MessageField, nfields); 301 302 for (upb_msg_field_begin(&it, msgdef); !upb_msg_field_done(&it); 303 upb_msg_field_next(&it)) { 304 const upb_fielddef* field = upb_msg_iter_field(&it); 305 size_t field_size; 306 307 if (upb_fielddef_containingoneof(field)) { 308 // Oneofs are handled separately below. 309 continue; 310 } 311 312 // Allocate |field_size| bytes for this field in the layout. 313 field_size = 0; 314 if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { 315 field_size = sizeof(zval*); 316 } else { 317 field_size = native_slot_size(upb_fielddef_type(field)); 318 } 319 320 // Align current offset up to | size | granularity. 321 off = align_up_to(off, field_size); 322 layout->fields[upb_fielddef_index(field)].offset = off; 323 layout->fields[upb_fielddef_index(field)].case_offset = 324 MESSAGE_FIELD_NO_CASE; 325 off += field_size; 326 } 327 328 // Handle oneofs now -- we iterate over oneofs specifically and allocate only 329 // one slot per oneof. 330 // 331 // We assign all value slots first, then pack the 'case' fields at the end, 332 // since in the common case (modern 64-bit platform) these are 8 bytes and 4 333 // bytes respectively and we want to avoid alignment overhead. 334 // 335 // Note that we reserve 4 bytes (a uint32) per 'case' slot because the value 336 // space for oneof cases is conceptually as wide as field tag numbers. In 337 // practice, it's unlikely that a oneof would have more than e.g. 256 or 64K 338 // members (8 or 16 bits respectively), so conceivably we could assign 339 // consecutive case numbers and then pick a smaller oneof case slot size, but 340 // the complexity to implement this indirection is probably not worthwhile. 341 for (upb_msg_oneof_begin(&oit, msgdef); !upb_msg_oneof_done(&oit); 342 upb_msg_oneof_next(&oit)) { 343 const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); 344 upb_oneof_iter fit; 345 346 // Always allocate NATIVE_SLOT_MAX_SIZE bytes, but share the slot between 347 // all fields. 348 size_t field_size = NATIVE_SLOT_MAX_SIZE; 349 // Align the offset . 350 off = align_up_to( off, field_size); 351 // Assign all fields in the oneof this same offset. 352 for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); 353 upb_oneof_next(&fit)) { 354 const upb_fielddef* field = upb_oneof_iter_field(&fit); 355 layout->fields[upb_fielddef_index(field)].offset = off; 356 } 357 off += field_size; 358 } 359 360 // Now the case fields. 361 for (upb_msg_oneof_begin(&oit, msgdef); !upb_msg_oneof_done(&oit); 362 upb_msg_oneof_next(&oit)) { 363 const upb_oneofdef* oneof = upb_msg_iter_oneof(&oit); 364 upb_oneof_iter fit; 365 366 size_t field_size = sizeof(uint32_t); 367 // Align the offset . 368 off = (off + field_size - 1) & ~(field_size - 1); 369 // Assign all fields in the oneof this same offset. 370 for (upb_oneof_begin(&fit, oneof); !upb_oneof_done(&fit); 371 upb_oneof_next(&fit)) { 372 const upb_fielddef* field = upb_oneof_iter_field(&fit); 373 layout->fields[upb_fielddef_index(field)].case_offset = off; 374 } 375 off += field_size; 376 } 377 378 layout->size = off; 379 380 layout->msgdef = msgdef; 381 upb_msgdef_ref(layout->msgdef, &layout->msgdef); 382 383 return layout; 384 } 385 386 void free_layout(MessageLayout* layout) { 387 FREE(layout->fields); 388 upb_msgdef_unref(layout->msgdef, &layout->msgdef); 389 FREE(layout); 390 } 391 392 // TODO(teboring): Add it back. 393 // VALUE field_type_class(const upb_fielddef* field) { 394 // VALUE type_class = Qnil; 395 // if (upb_fielddef_type(field) == UPB_TYPE_MESSAGE) { 396 // VALUE submsgdesc = get_def_obj(upb_fielddef_subdef(field)); 397 // type_class = Descriptor_msgclass(submsgdesc); 398 // } else if (upb_fielddef_type(field) == UPB_TYPE_ENUM) { 399 // VALUE subenumdesc = get_def_obj(upb_fielddef_subdef(field)); 400 // type_class = EnumDescriptor_enummodule(subenumdesc); 401 // } 402 // return type_class; 403 // } 404 405 static void* slot_memory(MessageLayout* layout, const void* storage, 406 const upb_fielddef* field) { 407 return ((uint8_t*)storage) + layout->fields[upb_fielddef_index(field)].offset; 408 } 409 410 static uint32_t* slot_oneof_case(MessageLayout* layout, const void* storage, 411 const upb_fielddef* field) { 412 return (uint32_t*)(((uint8_t*)storage) + 413 layout->fields[upb_fielddef_index(field)].case_offset); 414 } 415 416 void layout_set(MessageLayout* layout, void* storage, const upb_fielddef* field, 417 zval* val) { 418 void* memory = slot_memory(layout, storage, field); 419 uint32_t* oneof_case = slot_oneof_case(layout, storage, field); 420 421 if (upb_fielddef_containingoneof(field)) { 422 if (Z_TYPE_P(val) == IS_NULL) { 423 // Assigning nil to a oneof field clears the oneof completely. 424 *oneof_case = ONEOF_CASE_NONE; 425 memset(memory, 0, NATIVE_SLOT_MAX_SIZE); 426 } else { 427 // The transition between field types for a single oneof (union) slot is 428 // somewhat complex because we need to ensure that a GC triggered at any 429 // point by a call into the Ruby VM sees a valid state for this field and 430 // does not either go off into the weeds (following what it thinks is a 431 // VALUE but is actually a different field type) or miss an object (seeing 432 // what it thinks is a primitive field but is actually a VALUE for the new 433 // field type). 434 // 435 // In order for the transition to be safe, the oneof case slot must be in 436 // sync with the value slot whenever the Ruby VM has been called. Thus, we 437 // use native_slot_set_value_and_case(), which ensures that both the value 438 // and case number are altered atomically (w.r.t. the Ruby VM). 439 native_slot_set_value_and_case(upb_fielddef_type(field), 440 /*field_type_class(field),*/ memory, val, 441 oneof_case, upb_fielddef_number(field)); 442 } 443 } else if (is_map_field(field)) { 444 // TODO(teboring): Add it back. 445 // check_map_field_type(val, field); 446 // DEREF(memory, zval*) = val; 447 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { 448 // TODO(teboring): Add it back. 449 // check_repeated_field_type(val, field); 450 // DEREF(memory, zval*) = val; 451 } else { 452 native_slot_set(upb_fielddef_type(field), /*field_type_class(field),*/ memory, 453 val); 454 } 455 } 456 457 void layout_init(MessageLayout* layout, void* storage) { 458 upb_msg_field_iter it; 459 for (upb_msg_field_begin(&it, layout->msgdef); !upb_msg_field_done(&it); 460 upb_msg_field_next(&it)) { 461 const upb_fielddef* field = upb_msg_iter_field(&it); 462 void* memory = slot_memory(layout, storage, field); 463 uint32_t* oneof_case = slot_oneof_case(layout, storage, field); 464 465 if (upb_fielddef_containingoneof(field)) { 466 // TODO(teboring): Add it back. 467 // memset(memory, 0, NATIVE_SLOT_MAX_SIZE); 468 // *oneof_case = ONEOF_CASE_NONE; 469 } else if (is_map_field(field)) { 470 // TODO(teboring): Add it back. 471 // VALUE map = Qnil; 472 473 // const upb_fielddef* key_field = map_field_key(field); 474 // const upb_fielddef* value_field = map_field_value(field); 475 // VALUE type_class = field_type_class(value_field); 476 477 // if (type_class != Qnil) { 478 // VALUE args[3] = { 479 // fieldtype_to_php(upb_fielddef_type(key_field)), 480 // fieldtype_to_php(upb_fielddef_type(value_field)), type_class, 481 // }; 482 // map = php_class_new_instance(3, args, cMap); 483 // } else { 484 // VALUE args[2] = { 485 // fieldtype_to_php(upb_fielddef_type(key_field)), 486 // fieldtype_to_php(upb_fielddef_type(value_field)), 487 // }; 488 // map = php_class_new_instance(2, args, cMap); 489 // } 490 491 // DEREF(memory, VALUE) = map; 492 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { 493 // TODO(teboring): Add it back. 494 // VALUE ary = Qnil; 495 496 // VALUE type_class = field_type_class(field); 497 498 // if (type_class != Qnil) { 499 // VALUE args[2] = { 500 // fieldtype_to_php(upb_fielddef_type(field)), type_class, 501 // }; 502 // ary = php_class_new_instance(2, args, cRepeatedField); 503 // } else { 504 // VALUE args[1] = {fieldtype_to_php(upb_fielddef_type(field))}; 505 // ary = php_class_new_instance(1, args, cRepeatedField); 506 // } 507 508 // DEREF(memory, VALUE) = ary; 509 } else { 510 native_slot_init(upb_fielddef_type(field), memory); 511 } 512 } 513 } 514 515 zval* layout_get(MessageLayout* layout, const void* storage, 516 const upb_fielddef* field TSRMLS_DC) { 517 void* memory = slot_memory(layout, storage, field); 518 uint32_t* oneof_case = slot_oneof_case(layout, storage, field); 519 520 if (upb_fielddef_containingoneof(field)) { 521 if (*oneof_case != upb_fielddef_number(field)) { 522 return NULL; 523 // TODO(teboring): Add it back. 524 // return Qnil; 525 } 526 return NULL; 527 // TODO(teboring): Add it back. 528 // return native_slot_get(upb_fielddef_type(field), field_type_class(field), 529 // memory); 530 } else if (upb_fielddef_label(field) == UPB_LABEL_REPEATED) { 531 return NULL; 532 // TODO(teboring): Add it back. 533 // return *((VALUE*)memory); 534 } else { 535 return native_slot_get( 536 upb_fielddef_type(field), /*field_type_class(field), */ 537 memory TSRMLS_CC); 538 } 539 } 540