1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "slicer/reader.h" 18 #include "slicer/dex_bytecode.h" 19 #include "slicer/chronometer.h" 20 #include "slicer/dex_leb128.h" 21 22 #include <assert.h> 23 #include <string.h> 24 #include <type_traits> 25 #include <cstdlib> 26 27 namespace dex { 28 29 Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) { 30 // init the header reference 31 header_ = ptr<dex::Header>(0); 32 ValidateHeader(); 33 34 // start with an "empty" .dex IR 35 dex_ir_ = std::make_shared<ir::DexFile>(); 36 dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic)); 37 } 38 39 slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const { 40 return section<dex::ClassDef>(header_->class_defs_off, 41 header_->class_defs_size); 42 } 43 44 slicer::ArrayView<const dex::StringId> Reader::StringIds() const { 45 return section<dex::StringId>(header_->string_ids_off, 46 header_->string_ids_size); 47 } 48 49 slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const { 50 return section<dex::TypeId>(header_->type_ids_off, 51 header_->type_ids_size); 52 } 53 54 slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const { 55 return section<dex::FieldId>(header_->field_ids_off, 56 header_->field_ids_size); 57 } 58 59 slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const { 60 return section<dex::MethodId>(header_->method_ids_off, 61 header_->method_ids_size); 62 } 63 64 slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const { 65 return section<dex::ProtoId>(header_->proto_ids_off, 66 header_->proto_ids_size); 67 } 68 69 const dex::MapList* Reader::DexMapList() const { 70 return dataPtr<dex::MapList>(header_->map_off); 71 } 72 73 const char* Reader::GetStringMUTF8(dex::u4 index) const { 74 if (index == dex::kNoIndex) { 75 return "<no_string>"; 76 } 77 const dex::u1* strData = GetStringData(index); 78 dex::ReadULeb128(&strData); 79 return reinterpret_cast<const char*>(strData); 80 } 81 82 void Reader::CreateFullIr() { 83 size_t classCount = ClassDefs().size(); 84 for (size_t i = 0; i < classCount; ++i) { 85 CreateClassIr(i); 86 } 87 } 88 89 void Reader::CreateClassIr(dex::u4 index) { 90 auto ir_class = GetClass(index); 91 SLICER_CHECK(ir_class != nullptr); 92 } 93 94 // Returns the index of the class with the specified 95 // descriptor, or kNoIndex if not found 96 dex::u4 Reader::FindClassIndex(const char* class_descriptor) const { 97 auto classes = ClassDefs(); 98 auto types = TypeIds(); 99 for (dex::u4 i = 0; i < classes.size(); ++i) { 100 auto typeId = types[classes[i].class_idx]; 101 const char* descriptor = GetStringMUTF8(typeId.descriptor_idx); 102 if (strcmp(class_descriptor, descriptor) == 0) { 103 return i; 104 } 105 } 106 return dex::kNoIndex; 107 } 108 109 // map a .dex index to corresponding .dex IR node 110 // 111 // NOTES: 112 // 1. the mapping beween an index and the indexed 113 // .dex IR nodes is 1:1 114 // 2. we do a single index lookup for both existing 115 // nodes as well as new nodes 116 // 3. dummy is an invalid, but non-null pointer value 117 // used to check that the mapping loookup/update is atomic 118 // 4. there should be no recursion with the same index 119 // (we use the dummy value to guard against this too) 120 // 121 ir::Class* Reader::GetClass(dex::u4 index) { 122 SLICER_CHECK(index != dex::kNoIndex); 123 auto& p = dex_ir_->classes_map[index]; 124 auto dummy = reinterpret_cast<ir::Class*>(1); 125 if (p == nullptr) { 126 p = dummy; 127 auto newClass = ParseClass(index); 128 SLICER_CHECK(p == dummy); 129 p = newClass; 130 dex_ir_->classes_indexes.MarkUsedIndex(index); 131 } 132 SLICER_CHECK(p != dummy); 133 return p; 134 } 135 136 // map a .dex index to corresponding .dex IR node 137 // (see the Reader::GetClass() comments) 138 ir::Type* Reader::GetType(dex::u4 index) { 139 SLICER_CHECK(index != dex::kNoIndex); 140 auto& p = dex_ir_->types_map[index]; 141 auto dummy = reinterpret_cast<ir::Type*>(1); 142 if (p == nullptr) { 143 p = dummy; 144 auto newType = ParseType(index); 145 SLICER_CHECK(p == dummy); 146 p = newType; 147 dex_ir_->types_indexes.MarkUsedIndex(index); 148 } 149 SLICER_CHECK(p != dummy); 150 return p; 151 } 152 153 // map a .dex index to corresponding .dex IR node 154 // (see the Reader::GetClass() comments) 155 ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) { 156 SLICER_CHECK(index != dex::kNoIndex); 157 auto& p = dex_ir_->fields_map[index]; 158 auto dummy = reinterpret_cast<ir::FieldDecl*>(1); 159 if (p == nullptr) { 160 p = dummy; 161 auto newField = ParseFieldDecl(index); 162 SLICER_CHECK(p == dummy); 163 p = newField; 164 dex_ir_->fields_indexes.MarkUsedIndex(index); 165 } 166 SLICER_CHECK(p != dummy); 167 return p; 168 } 169 170 // map a .dex index to corresponding .dex IR node 171 // (see the Reader::GetClass() comments) 172 ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) { 173 SLICER_CHECK(index != dex::kNoIndex); 174 auto& p = dex_ir_->methods_map[index]; 175 auto dummy = reinterpret_cast<ir::MethodDecl*>(1); 176 if (p == nullptr) { 177 p = dummy; 178 auto newMethod = ParseMethodDecl(index); 179 SLICER_CHECK(p == dummy); 180 p = newMethod; 181 dex_ir_->methods_indexes.MarkUsedIndex(index); 182 } 183 SLICER_CHECK(p != dummy); 184 return p; 185 } 186 187 // map a .dex index to corresponding .dex IR node 188 // (see the Reader::GetClass() comments) 189 ir::Proto* Reader::GetProto(dex::u4 index) { 190 SLICER_CHECK(index != dex::kNoIndex); 191 auto& p = dex_ir_->protos_map[index]; 192 auto dummy = reinterpret_cast<ir::Proto*>(1); 193 if (p == nullptr) { 194 p = dummy; 195 auto newProto = ParseProto(index); 196 SLICER_CHECK(p == dummy); 197 p = newProto; 198 dex_ir_->protos_indexes.MarkUsedIndex(index); 199 } 200 SLICER_CHECK(p != dummy); 201 return p; 202 } 203 204 // map a .dex index to corresponding .dex IR node 205 // (see the Reader::GetClass() comments) 206 ir::String* Reader::GetString(dex::u4 index) { 207 SLICER_CHECK(index != dex::kNoIndex); 208 auto& p = dex_ir_->strings_map[index]; 209 auto dummy = reinterpret_cast<ir::String*>(1); 210 if (p == nullptr) { 211 p = dummy; 212 auto newString = ParseString(index); 213 SLICER_CHECK(p == dummy); 214 p = newString; 215 dex_ir_->strings_indexes.MarkUsedIndex(index); 216 } 217 SLICER_CHECK(p != dummy); 218 return p; 219 } 220 221 ir::Class* Reader::ParseClass(dex::u4 index) { 222 auto& dex_class_def = ClassDefs()[index]; 223 auto ir_class = dex_ir_->Alloc<ir::Class>(); 224 225 ir_class->type = GetType(dex_class_def.class_idx); 226 assert(ir_class->type->class_def == nullptr); 227 ir_class->type->class_def = ir_class; 228 229 ir_class->access_flags = dex_class_def.access_flags; 230 ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off); 231 232 if (dex_class_def.superclass_idx != dex::kNoIndex) { 233 ir_class->super_class = GetType(dex_class_def.superclass_idx); 234 } 235 236 if (dex_class_def.source_file_idx != dex::kNoIndex) { 237 ir_class->source_file = GetString(dex_class_def.source_file_idx); 238 } 239 240 if (dex_class_def.class_data_off != 0) { 241 const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off); 242 243 dex::u4 static_fields_count = dex::ReadULeb128(&class_data); 244 dex::u4 instance_fields_count = dex::ReadULeb128(&class_data); 245 dex::u4 direct_methods_count = dex::ReadULeb128(&class_data); 246 dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data); 247 248 dex::u4 base_index = dex::kNoIndex; 249 for (dex::u4 i = 0; i < static_fields_count; ++i) { 250 auto field = ParseEncodedField(&class_data, &base_index); 251 ir_class->static_fields.push_back(field); 252 } 253 254 base_index = dex::kNoIndex; 255 for (dex::u4 i = 0; i < instance_fields_count; ++i) { 256 auto field = ParseEncodedField(&class_data, &base_index); 257 ir_class->instance_fields.push_back(field); 258 } 259 260 base_index = dex::kNoIndex; 261 for (dex::u4 i = 0; i < direct_methods_count; ++i) { 262 auto method = ParseEncodedMethod(&class_data, &base_index); 263 ir_class->direct_methods.push_back(method); 264 } 265 266 base_index = dex::kNoIndex; 267 for (dex::u4 i = 0; i < virtual_methods_count; ++i) { 268 auto method = ParseEncodedMethod(&class_data, &base_index); 269 ir_class->virtual_methods.push_back(method); 270 } 271 } 272 273 ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off); 274 ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off); 275 ir_class->orig_index = index; 276 277 return ir_class; 278 } 279 280 ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) { 281 if (offset == 0) { 282 return nullptr; 283 } 284 285 SLICER_CHECK(offset % 4 == 0); 286 287 // first check if we already extracted the same "annotations_directory_item" 288 auto& ir_annotations = annotations_directories_[offset]; 289 if (ir_annotations == nullptr) { 290 ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>(); 291 292 auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset); 293 294 ir_annotations->class_annotation = 295 ExtractAnnotationSet(dex_annotations->class_annotations_off); 296 297 const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1); 298 299 for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) { 300 ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr)); 301 } 302 303 for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) { 304 ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr)); 305 } 306 307 for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) { 308 ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr)); 309 } 310 } 311 return ir_annotations; 312 } 313 314 ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) { 315 SLICER_CHECK(offset != 0); 316 317 // first check if we already extracted the same "annotation_item" 318 auto& ir_annotation = annotations_[offset]; 319 if (ir_annotation == nullptr) { 320 auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset); 321 const dex::u1* ptr = dexAnnotationItem->annotation; 322 ir_annotation = ParseAnnotation(&ptr); 323 ir_annotation->visibility = dexAnnotationItem->visibility; 324 } 325 return ir_annotation; 326 } 327 328 ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) { 329 if (offset == 0) { 330 return nullptr; 331 } 332 333 SLICER_CHECK(offset % 4 == 0); 334 335 // first check if we already extracted the same "annotation_set_item" 336 auto& ir_annotation_set = annotation_sets_[offset]; 337 if (ir_annotation_set == nullptr) { 338 ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>(); 339 340 auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset); 341 for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) { 342 auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]); 343 assert(ir_annotation != nullptr); 344 ir_annotation_set->annotations.push_back(ir_annotation); 345 } 346 } 347 return ir_annotation_set; 348 } 349 350 ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) { 351 SLICER_CHECK(offset % 4 == 0); 352 353 auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset); 354 auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>(); 355 356 for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) { 357 dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off; 358 if (entry_offset != 0) { 359 auto ir_annotation_set = ExtractAnnotationSet(entry_offset); 360 SLICER_CHECK(ir_annotation_set != nullptr); 361 ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set); 362 } 363 } 364 365 return ir_annotation_set_ref_list; 366 } 367 368 ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) { 369 auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr); 370 auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>(); 371 372 ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx); 373 374 ir_field_annotation->annotations = 375 ExtractAnnotationSet(dex_field_annotation->annotations_off); 376 SLICER_CHECK(ir_field_annotation->annotations != nullptr); 377 378 *pptr += sizeof(dex::FieldAnnotationsItem); 379 return ir_field_annotation; 380 } 381 382 ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) { 383 auto dex_method_annotation = 384 reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr); 385 auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>(); 386 387 ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx); 388 389 ir_method_annotation->annotations = 390 ExtractAnnotationSet(dex_method_annotation->annotations_off); 391 SLICER_CHECK(ir_method_annotation->annotations != nullptr); 392 393 *pptr += sizeof(dex::MethodAnnotationsItem); 394 return ir_method_annotation; 395 } 396 397 ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) { 398 auto dex_param_annotation = 399 reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr); 400 auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>(); 401 402 ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx); 403 404 ir_param_annotation->annotations = 405 ExtractAnnotationSetRefList(dex_param_annotation->annotations_off); 406 SLICER_CHECK(ir_param_annotation->annotations != nullptr); 407 408 *pptr += sizeof(dex::ParameterAnnotationsItem); 409 return ir_param_annotation; 410 } 411 412 ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) { 413 auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>(); 414 415 auto field_index = dex::ReadULeb128(pptr); 416 SLICER_CHECK(field_index != dex::kNoIndex); 417 if (*base_index != dex::kNoIndex) { 418 SLICER_CHECK(field_index != 0); 419 field_index += *base_index; 420 } 421 *base_index = field_index; 422 423 ir_encoded_field->decl = GetFieldDecl(field_index); 424 ir_encoded_field->access_flags = dex::ReadULeb128(pptr); 425 426 return ir_encoded_field; 427 } 428 429 // Parse an encoded variable-length integer value 430 // (sign-extend signed types, zero-extend unsigned types) 431 template <class T> 432 static T ParseIntValue(const dex::u1** pptr, size_t size) { 433 static_assert(std::is_integral<T>::value, "must be an integral type"); 434 435 SLICER_CHECK(size > 0); 436 SLICER_CHECK(size <= sizeof(T)); 437 438 T value = 0; 439 for (int i = 0; i < size; ++i) { 440 value |= T(*(*pptr)++) << (i * 8); 441 } 442 443 // sign-extend? 444 if (std::is_signed<T>::value) { 445 size_t shift = (sizeof(T) - size) * 8; 446 value = T(value << shift) >> shift; 447 } 448 449 return value; 450 } 451 452 // Parse an encoded variable-length floating point value 453 // (zero-extend to the right) 454 template <class T> 455 static T ParseFloatValue(const dex::u1** pptr, size_t size) { 456 SLICER_CHECK(size > 0); 457 SLICER_CHECK(size <= sizeof(T)); 458 459 T value = 0; 460 int start_byte = sizeof(T) - size; 461 for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0; 462 --size) { 463 *p++ = *(*pptr)++; 464 } 465 return value; 466 } 467 468 ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) { 469 auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>(); 470 471 SLICER_EXTRA(auto base_ptr = *pptr); 472 473 dex::u1 header = *(*pptr)++; 474 dex::u1 type = header & dex::kEncodedValueTypeMask; 475 dex::u1 arg = header >> dex::kEncodedValueArgShift; 476 477 ir_encoded_value->type = type; 478 479 switch (type) { 480 case dex::kEncodedByte: 481 ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1); 482 break; 483 484 case dex::kEncodedShort: 485 ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1); 486 break; 487 488 case dex::kEncodedChar: 489 ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1); 490 break; 491 492 case dex::kEncodedInt: 493 ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1); 494 break; 495 496 case dex::kEncodedLong: 497 ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1); 498 break; 499 500 case dex::kEncodedFloat: 501 ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1); 502 break; 503 504 case dex::kEncodedDouble: 505 ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1); 506 break; 507 508 case dex::kEncodedString: { 509 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); 510 ir_encoded_value->u.string_value = GetString(index); 511 } break; 512 513 case dex::kEncodedType: { 514 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); 515 ir_encoded_value->u.type_value = GetType(index); 516 } break; 517 518 case dex::kEncodedField: { 519 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); 520 ir_encoded_value->u.field_value = GetFieldDecl(index); 521 } break; 522 523 case dex::kEncodedMethod: { 524 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); 525 ir_encoded_value->u.method_value = GetMethodDecl(index); 526 } break; 527 528 case dex::kEncodedEnum: { 529 dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1); 530 ir_encoded_value->u.enum_value = GetFieldDecl(index); 531 } break; 532 533 case dex::kEncodedArray: 534 SLICER_CHECK(arg == 0); 535 ir_encoded_value->u.array_value = ParseEncodedArray(pptr); 536 break; 537 538 case dex::kEncodedAnnotation: 539 SLICER_CHECK(arg == 0); 540 ir_encoded_value->u.annotation_value = ParseAnnotation(pptr); 541 break; 542 543 case dex::kEncodedNull: 544 SLICER_CHECK(arg == 0); 545 break; 546 547 case dex::kEncodedBoolean: 548 SLICER_CHECK(arg < 2); 549 ir_encoded_value->u.bool_value = (arg == 1); 550 break; 551 552 default: 553 SLICER_CHECK(!"unexpected value type"); 554 } 555 556 SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr)); 557 558 return ir_encoded_value; 559 } 560 561 ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) { 562 auto ir_annotation = dex_ir_->Alloc<ir::Annotation>(); 563 564 dex::u4 type_index = dex::ReadULeb128(pptr); 565 dex::u4 elements_count = dex::ReadULeb128(pptr); 566 567 ir_annotation->type = GetType(type_index); 568 ir_annotation->visibility = dex::kVisibilityEncoded; 569 570 for (dex::u4 i = 0; i < elements_count; ++i) { 571 auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>(); 572 573 ir_element->name = GetString(dex::ReadULeb128(pptr)); 574 ir_element->value = ParseEncodedValue(pptr); 575 576 ir_annotation->elements.push_back(ir_element); 577 } 578 579 return ir_annotation; 580 } 581 582 ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) { 583 auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>(); 584 585 dex::u4 count = dex::ReadULeb128(pptr); 586 for (dex::u4 i = 0; i < count; ++i) { 587 ir_encoded_array->values.push_back(ParseEncodedValue(pptr)); 588 } 589 590 return ir_encoded_array; 591 } 592 593 ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) { 594 if (offset == 0) { 595 return nullptr; 596 } 597 598 // first check if we already extracted the same "annotation_item" 599 auto& ir_encoded_array = encoded_arrays_[offset]; 600 if (ir_encoded_array == nullptr) { 601 auto ptr = dataPtr<dex::u1>(offset); 602 ir_encoded_array = ParseEncodedArray(&ptr); 603 } 604 return ir_encoded_array; 605 } 606 607 ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) { 608 if (offset == 0) { 609 return nullptr; 610 } 611 612 auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>(); 613 const dex::u1* ptr = dataPtr<dex::u1>(offset); 614 615 ir_debug_info->line_start = dex::ReadULeb128(&ptr); 616 617 // TODO: implicit this param for non-static methods? 618 dex::u4 param_count = dex::ReadULeb128(&ptr); 619 for (dex::u4 i = 0; i < param_count; ++i) { 620 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; 621 auto ir_string = 622 (name_index == dex::kNoIndex) ? nullptr : GetString(name_index); 623 ir_debug_info->param_names.push_back(ir_string); 624 } 625 626 // parse the debug info opcodes and note the 627 // references to strings and types (to make sure the IR 628 // is the full closure of all referenced items) 629 // 630 // TODO: design a generic debug info iterator? 631 // 632 auto base_ptr = ptr; 633 dex::u1 opcode = 0; 634 while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) { 635 switch (opcode) { 636 case dex::DBG_ADVANCE_PC: 637 // addr_diff 638 dex::ReadULeb128(&ptr); 639 break; 640 641 case dex::DBG_ADVANCE_LINE: 642 // line_diff 643 dex::ReadSLeb128(&ptr); 644 break; 645 646 case dex::DBG_START_LOCAL: { 647 // register_num 648 dex::ReadULeb128(&ptr); 649 650 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; 651 if (name_index != dex::kNoIndex) { 652 GetString(name_index); 653 } 654 655 dex::u4 type_index = dex::ReadULeb128(&ptr) - 1; 656 if (type_index != dex::kNoIndex) { 657 GetType(type_index); 658 } 659 } break; 660 661 case dex::DBG_START_LOCAL_EXTENDED: { 662 // register_num 663 dex::ReadULeb128(&ptr); 664 665 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; 666 if (name_index != dex::kNoIndex) { 667 GetString(name_index); 668 } 669 670 dex::u4 type_index = dex::ReadULeb128(&ptr) - 1; 671 if (type_index != dex::kNoIndex) { 672 GetType(type_index); 673 } 674 675 dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1; 676 if (sig_index != dex::kNoIndex) { 677 GetString(sig_index); 678 } 679 } break; 680 681 case dex::DBG_END_LOCAL: 682 case dex::DBG_RESTART_LOCAL: 683 // register_num 684 dex::ReadULeb128(&ptr); 685 break; 686 687 case dex::DBG_SET_FILE: { 688 dex::u4 name_index = dex::ReadULeb128(&ptr) - 1; 689 if (name_index != dex::kNoIndex) { 690 GetString(name_index); 691 } 692 } break; 693 } 694 } 695 696 ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr); 697 698 return ir_debug_info; 699 } 700 701 ir::Code* Reader::ExtractCode(dex::u4 offset) { 702 if (offset == 0) { 703 return nullptr; 704 } 705 706 SLICER_CHECK(offset % 4 == 0); 707 708 auto dex_code = dataPtr<dex::Code>(offset); 709 auto ir_code = dex_ir_->Alloc<ir::Code>(); 710 711 ir_code->registers = dex_code->registers_size; 712 ir_code->ins_count = dex_code->ins_size; 713 ir_code->outs_count = dex_code->outs_size; 714 715 // instructions array 716 ir_code->instructions = 717 slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size); 718 719 // parse the instructions to discover references to other 720 // IR nodes (see debug info stream parsing too) 721 ParseInstructions(ir_code->instructions); 722 723 // try blocks & handlers 724 // 725 // TODO: a generic try/catch blocks iterator? 726 // 727 if (dex_code->tries_size != 0) { 728 dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2; 729 auto tries = 730 reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count); 731 auto handlers_list = 732 reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size); 733 734 ir_code->try_blocks = 735 slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size); 736 737 // parse the handlers list (and discover embedded references) 738 auto ptr = handlers_list; 739 740 dex::u4 handlers_count = dex::ReadULeb128(&ptr); 741 SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size); 742 743 for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) { 744 int catch_count = dex::ReadSLeb128(&ptr); 745 746 for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) { 747 dex::u4 type_index = dex::ReadULeb128(&ptr); 748 GetType(type_index); 749 750 // address 751 dex::ReadULeb128(&ptr); 752 } 753 754 if (catch_count < 1) { 755 // catch_all_addr 756 dex::ReadULeb128(&ptr); 757 } 758 } 759 760 ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list); 761 } 762 763 ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off); 764 765 return ir_code; 766 } 767 768 ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) { 769 auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>(); 770 771 auto method_index = dex::ReadULeb128(pptr); 772 SLICER_CHECK(method_index != dex::kNoIndex); 773 if (*base_index != dex::kNoIndex) { 774 SLICER_CHECK(method_index != 0); 775 method_index += *base_index; 776 } 777 *base_index = method_index; 778 779 ir_encoded_method->decl = GetMethodDecl(method_index); 780 ir_encoded_method->access_flags = dex::ReadULeb128(pptr); 781 782 dex::u4 code_offset = dex::ReadULeb128(pptr); 783 ir_encoded_method->code = ExtractCode(code_offset); 784 785 // update the methods lookup table 786 dex_ir_->methods_lookup.Insert(ir_encoded_method); 787 788 return ir_encoded_method; 789 } 790 791 ir::Type* Reader::ParseType(dex::u4 index) { 792 auto& dex_type = TypeIds()[index]; 793 auto ir_type = dex_ir_->Alloc<ir::Type>(); 794 795 ir_type->descriptor = GetString(dex_type.descriptor_idx); 796 ir_type->orig_index = index; 797 798 return ir_type; 799 } 800 801 ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) { 802 auto& dex_field = FieldIds()[index]; 803 auto ir_field = dex_ir_->Alloc<ir::FieldDecl>(); 804 805 ir_field->name = GetString(dex_field.name_idx); 806 ir_field->type = GetType(dex_field.type_idx); 807 ir_field->parent = GetType(dex_field.class_idx); 808 ir_field->orig_index = index; 809 810 return ir_field; 811 } 812 813 ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) { 814 auto& dex_method = MethodIds()[index]; 815 auto ir_method = dex_ir_->Alloc<ir::MethodDecl>(); 816 817 ir_method->name = GetString(dex_method.name_idx); 818 ir_method->prototype = GetProto(dex_method.proto_idx); 819 ir_method->parent = GetType(dex_method.class_idx); 820 ir_method->orig_index = index; 821 822 return ir_method; 823 } 824 825 ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) { 826 if (offset == 0) { 827 return nullptr; 828 } 829 830 // first check to see if we already extracted the same "type_list" 831 auto& ir_type_list = type_lists_[offset]; 832 if (ir_type_list == nullptr) { 833 ir_type_list = dex_ir_->Alloc<ir::TypeList>(); 834 835 auto dex_type_list = dataPtr<dex::TypeList>(offset); 836 SLICER_WEAK_CHECK(dex_type_list->size > 0); 837 838 for (dex::u4 i = 0; i < dex_type_list->size; ++i) { 839 ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx)); 840 } 841 } 842 843 return ir_type_list; 844 } 845 846 ir::Proto* Reader::ParseProto(dex::u4 index) { 847 auto& dex_proto = ProtoIds()[index]; 848 auto ir_proto = dex_ir_->Alloc<ir::Proto>(); 849 850 ir_proto->shorty = GetString(dex_proto.shorty_idx); 851 ir_proto->return_type = GetType(dex_proto.return_type_idx); 852 ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off); 853 ir_proto->orig_index = index; 854 855 // update the prototypes lookup table 856 dex_ir_->prototypes_lookup.Insert(ir_proto); 857 858 return ir_proto; 859 } 860 861 ir::String* Reader::ParseString(dex::u4 index) { 862 auto ir_string = dex_ir_->Alloc<ir::String>(); 863 864 auto data = GetStringData(index); 865 auto cstr = data; 866 dex::ReadULeb128(&cstr); 867 size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1; 868 869 ir_string->data = slicer::MemView(data, size); 870 ir_string->orig_index = index; 871 872 // update the strings lookup table 873 dex_ir_->strings_lookup.Insert(ir_string); 874 875 return ir_string; 876 } 877 878 void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) { 879 const dex::u2* ptr = code.begin(); 880 while (ptr < code.end()) { 881 auto dex_instr = dex::DecodeInstruction(ptr); 882 883 dex::u4 index = dex::kNoIndex; 884 switch (dex::GetFormatFromOpcode(dex_instr.opcode)) { 885 case dex::kFmt20bc: 886 case dex::kFmt21c: 887 case dex::kFmt31c: 888 case dex::kFmt35c: 889 case dex::kFmt3rc: 890 index = dex_instr.vB; 891 break; 892 893 case dex::kFmt22c: 894 index = dex_instr.vC; 895 break; 896 897 default: 898 break; 899 } 900 901 switch (GetIndexTypeFromOpcode(dex_instr.opcode)) { 902 case dex::kIndexStringRef: 903 GetString(index); 904 break; 905 906 case dex::kIndexTypeRef: 907 GetType(index); 908 break; 909 910 case dex::kIndexFieldRef: 911 GetFieldDecl(index); 912 break; 913 914 case dex::kIndexMethodRef: 915 GetMethodDecl(index); 916 break; 917 918 default: 919 break; 920 } 921 922 auto isize = dex::GetWidthFromBytecode(ptr); 923 SLICER_CHECK(isize > 0); 924 ptr += isize; 925 } 926 SLICER_CHECK(ptr == code.end()); 927 } 928 929 // Basic .dex header structural checks 930 void Reader::ValidateHeader() { 931 SLICER_CHECK(size_ > sizeof(dex::Header)); 932 933 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an 934 // estimate. b/72402467 935 SLICER_CHECK(header_->file_size <= size_); 936 SLICER_CHECK(header_->header_size == sizeof(dex::Header)); 937 SLICER_CHECK(header_->endian_tag == dex::kEndianConstant); 938 SLICER_CHECK(header_->data_size % 4 == 0); 939 940 // Known issue: The fields might be slighly corrupted b/65452964 941 // SLICER_CHECK(header_->data_off + header_->data_size <= size_); 942 943 SLICER_CHECK(header_->string_ids_off % 4 == 0); 944 SLICER_CHECK(header_->type_ids_size < 65536); 945 SLICER_CHECK(header_->type_ids_off % 4 == 0); 946 SLICER_CHECK(header_->proto_ids_size < 65536); 947 SLICER_CHECK(header_->proto_ids_off % 4 == 0); 948 SLICER_CHECK(header_->field_ids_off % 4 == 0); 949 SLICER_CHECK(header_->method_ids_off % 4 == 0); 950 SLICER_CHECK(header_->class_defs_off % 4 == 0); 951 SLICER_CHECK(header_->map_off >= header_->data_off && header_->map_off < size_); 952 SLICER_CHECK(header_->link_size == 0); 953 SLICER_CHECK(header_->link_off == 0); 954 SLICER_CHECK(header_->data_off % 4 == 0); 955 SLICER_CHECK(header_->map_off % 4 == 0); 956 957 // we seem to have .dex files with extra bytes at the end ... 958 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an 959 // estimate. b/72402467 960 SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_); 961 962 // but we should still have the whole data section 963 964 // Known issue: The fields might be slighly corrupted b/65452964 965 // Known issue: For performance reasons the initial size_ passed to jvmti events might be an 966 // estimate. b/72402467 967 // SLICER_CHECK(header_->data_off + header_->data_size <= size_); 968 969 // validate the map 970 // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size]) 971 auto map_list = ptr<dex::MapList>(header_->map_off); 972 SLICER_CHECK(map_list->size > 0); 973 auto map_section_size = 974 sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size; 975 SLICER_CHECK(header_->map_off + map_section_size <= size_); 976 } 977 978 } // namespace dex 979