Home | History | Annotate | Download | only in slicer
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "slicer/reader.h"
     18 #include "slicer/dex_bytecode.h"
     19 #include "slicer/chronometer.h"
     20 #include "slicer/dex_leb128.h"
     21 
     22 #include <assert.h>
     23 #include <string.h>
     24 #include <type_traits>
     25 #include <cstdlib>
     26 
     27 namespace dex {
     28 
     29 Reader::Reader(const dex::u1* image, size_t size) : image_(image), size_(size) {
     30   // init the header reference
     31   header_ = ptr<dex::Header>(0);
     32   ValidateHeader();
     33 
     34   // start with an "empty" .dex IR
     35   dex_ir_ = std::make_shared<ir::DexFile>();
     36   dex_ir_->magic = slicer::MemView(header_, sizeof(dex::Header::magic));
     37 }
     38 
     39 slicer::ArrayView<const dex::ClassDef> Reader::ClassDefs() const {
     40   return section<dex::ClassDef>(header_->class_defs_off,
     41                                 header_->class_defs_size);
     42 }
     43 
     44 slicer::ArrayView<const dex::StringId> Reader::StringIds() const {
     45   return section<dex::StringId>(header_->string_ids_off,
     46                                 header_->string_ids_size);
     47 }
     48 
     49 slicer::ArrayView<const dex::TypeId> Reader::TypeIds() const {
     50   return section<dex::TypeId>(header_->type_ids_off,
     51                               header_->type_ids_size);
     52 }
     53 
     54 slicer::ArrayView<const dex::FieldId> Reader::FieldIds() const {
     55   return section<dex::FieldId>(header_->field_ids_off,
     56                                header_->field_ids_size);
     57 }
     58 
     59 slicer::ArrayView<const dex::MethodId> Reader::MethodIds() const {
     60   return section<dex::MethodId>(header_->method_ids_off,
     61                                 header_->method_ids_size);
     62 }
     63 
     64 slicer::ArrayView<const dex::ProtoId> Reader::ProtoIds() const {
     65   return section<dex::ProtoId>(header_->proto_ids_off,
     66                                header_->proto_ids_size);
     67 }
     68 
     69 const dex::MapList* Reader::DexMapList() const {
     70   return dataPtr<dex::MapList>(header_->map_off);
     71 }
     72 
     73 const char* Reader::GetStringMUTF8(dex::u4 index) const {
     74   if (index == dex::kNoIndex) {
     75     return "<no_string>";
     76   }
     77   const dex::u1* strData = GetStringData(index);
     78   dex::ReadULeb128(&strData);
     79   return reinterpret_cast<const char*>(strData);
     80 }
     81 
     82 void Reader::CreateFullIr() {
     83   size_t classCount = ClassDefs().size();
     84   for (size_t i = 0; i < classCount; ++i) {
     85     CreateClassIr(i);
     86   }
     87 }
     88 
     89 void Reader::CreateClassIr(dex::u4 index) {
     90   auto ir_class = GetClass(index);
     91   SLICER_CHECK(ir_class != nullptr);
     92 }
     93 
     94 // Returns the index of the class with the specified
     95 // descriptor, or kNoIndex if not found
     96 dex::u4 Reader::FindClassIndex(const char* class_descriptor) const {
     97   auto classes = ClassDefs();
     98   auto types = TypeIds();
     99   for (dex::u4 i = 0; i < classes.size(); ++i) {
    100     auto typeId = types[classes[i].class_idx];
    101     const char* descriptor = GetStringMUTF8(typeId.descriptor_idx);
    102     if (strcmp(class_descriptor, descriptor) == 0) {
    103       return i;
    104     }
    105   }
    106   return dex::kNoIndex;
    107 }
    108 
    109 // map a .dex index to corresponding .dex IR node
    110 //
    111 // NOTES:
    112 //  1. the mapping beween an index and the indexed
    113 //     .dex IR nodes is 1:1
    114 //  2. we do a single index lookup for both existing
    115 //     nodes as well as new nodes
    116 //  3. dummy is an invalid, but non-null pointer value
    117 //     used to check that the mapping loookup/update is atomic
    118 //  4. there should be no recursion with the same index
    119 //     (we use the dummy value to guard against this too)
    120 //
    121 ir::Class* Reader::GetClass(dex::u4 index) {
    122   SLICER_CHECK(index != dex::kNoIndex);
    123   auto& p = dex_ir_->classes_map[index];
    124   auto dummy = reinterpret_cast<ir::Class*>(1);
    125   if (p == nullptr) {
    126     p = dummy;
    127     auto newClass = ParseClass(index);
    128     SLICER_CHECK(p == dummy);
    129     p = newClass;
    130     dex_ir_->classes_indexes.MarkUsedIndex(index);
    131   }
    132   SLICER_CHECK(p != dummy);
    133   return p;
    134 }
    135 
    136 // map a .dex index to corresponding .dex IR node
    137 // (see the Reader::GetClass() comments)
    138 ir::Type* Reader::GetType(dex::u4 index) {
    139   SLICER_CHECK(index != dex::kNoIndex);
    140   auto& p = dex_ir_->types_map[index];
    141   auto dummy = reinterpret_cast<ir::Type*>(1);
    142   if (p == nullptr) {
    143     p = dummy;
    144     auto newType = ParseType(index);
    145     SLICER_CHECK(p == dummy);
    146     p = newType;
    147     dex_ir_->types_indexes.MarkUsedIndex(index);
    148   }
    149   SLICER_CHECK(p != dummy);
    150   return p;
    151 }
    152 
    153 // map a .dex index to corresponding .dex IR node
    154 // (see the Reader::GetClass() comments)
    155 ir::FieldDecl* Reader::GetFieldDecl(dex::u4 index) {
    156   SLICER_CHECK(index != dex::kNoIndex);
    157   auto& p = dex_ir_->fields_map[index];
    158   auto dummy = reinterpret_cast<ir::FieldDecl*>(1);
    159   if (p == nullptr) {
    160     p = dummy;
    161     auto newField = ParseFieldDecl(index);
    162     SLICER_CHECK(p == dummy);
    163     p = newField;
    164     dex_ir_->fields_indexes.MarkUsedIndex(index);
    165   }
    166   SLICER_CHECK(p != dummy);
    167   return p;
    168 }
    169 
    170 // map a .dex index to corresponding .dex IR node
    171 // (see the Reader::GetClass() comments)
    172 ir::MethodDecl* Reader::GetMethodDecl(dex::u4 index) {
    173   SLICER_CHECK(index != dex::kNoIndex);
    174   auto& p = dex_ir_->methods_map[index];
    175   auto dummy = reinterpret_cast<ir::MethodDecl*>(1);
    176   if (p == nullptr) {
    177     p = dummy;
    178     auto newMethod = ParseMethodDecl(index);
    179     SLICER_CHECK(p == dummy);
    180     p = newMethod;
    181     dex_ir_->methods_indexes.MarkUsedIndex(index);
    182   }
    183   SLICER_CHECK(p != dummy);
    184   return p;
    185 }
    186 
    187 // map a .dex index to corresponding .dex IR node
    188 // (see the Reader::GetClass() comments)
    189 ir::Proto* Reader::GetProto(dex::u4 index) {
    190   SLICER_CHECK(index != dex::kNoIndex);
    191   auto& p = dex_ir_->protos_map[index];
    192   auto dummy = reinterpret_cast<ir::Proto*>(1);
    193   if (p == nullptr) {
    194     p = dummy;
    195     auto newProto = ParseProto(index);
    196     SLICER_CHECK(p == dummy);
    197     p = newProto;
    198     dex_ir_->protos_indexes.MarkUsedIndex(index);
    199   }
    200   SLICER_CHECK(p != dummy);
    201   return p;
    202 }
    203 
    204 // map a .dex index to corresponding .dex IR node
    205 // (see the Reader::GetClass() comments)
    206 ir::String* Reader::GetString(dex::u4 index) {
    207   SLICER_CHECK(index != dex::kNoIndex);
    208   auto& p = dex_ir_->strings_map[index];
    209   auto dummy = reinterpret_cast<ir::String*>(1);
    210   if (p == nullptr) {
    211     p = dummy;
    212     auto newString = ParseString(index);
    213     SLICER_CHECK(p == dummy);
    214     p = newString;
    215     dex_ir_->strings_indexes.MarkUsedIndex(index);
    216   }
    217   SLICER_CHECK(p != dummy);
    218   return p;
    219 }
    220 
    221 ir::Class* Reader::ParseClass(dex::u4 index) {
    222   auto& dex_class_def = ClassDefs()[index];
    223   auto ir_class = dex_ir_->Alloc<ir::Class>();
    224 
    225   ir_class->type = GetType(dex_class_def.class_idx);
    226   assert(ir_class->type->class_def == nullptr);
    227   ir_class->type->class_def = ir_class;
    228 
    229   ir_class->access_flags = dex_class_def.access_flags;
    230   ir_class->interfaces = ExtractTypeList(dex_class_def.interfaces_off);
    231 
    232   if (dex_class_def.superclass_idx != dex::kNoIndex) {
    233     ir_class->super_class = GetType(dex_class_def.superclass_idx);
    234   }
    235 
    236   if (dex_class_def.source_file_idx != dex::kNoIndex) {
    237     ir_class->source_file = GetString(dex_class_def.source_file_idx);
    238   }
    239 
    240   if (dex_class_def.class_data_off != 0) {
    241     const dex::u1* class_data = dataPtr<dex::u1>(dex_class_def.class_data_off);
    242 
    243     dex::u4 static_fields_count = dex::ReadULeb128(&class_data);
    244     dex::u4 instance_fields_count = dex::ReadULeb128(&class_data);
    245     dex::u4 direct_methods_count = dex::ReadULeb128(&class_data);
    246     dex::u4 virtual_methods_count = dex::ReadULeb128(&class_data);
    247 
    248     dex::u4 base_index = dex::kNoIndex;
    249     for (dex::u4 i = 0; i < static_fields_count; ++i) {
    250       auto field = ParseEncodedField(&class_data, &base_index);
    251       ir_class->static_fields.push_back(field);
    252     }
    253 
    254     base_index = dex::kNoIndex;
    255     for (dex::u4 i = 0; i < instance_fields_count; ++i) {
    256       auto field = ParseEncodedField(&class_data, &base_index);
    257       ir_class->instance_fields.push_back(field);
    258     }
    259 
    260     base_index = dex::kNoIndex;
    261     for (dex::u4 i = 0; i < direct_methods_count; ++i) {
    262       auto method = ParseEncodedMethod(&class_data, &base_index);
    263       ir_class->direct_methods.push_back(method);
    264     }
    265 
    266     base_index = dex::kNoIndex;
    267     for (dex::u4 i = 0; i < virtual_methods_count; ++i) {
    268       auto method = ParseEncodedMethod(&class_data, &base_index);
    269       ir_class->virtual_methods.push_back(method);
    270     }
    271   }
    272 
    273   ir_class->static_init = ExtractEncodedArray(dex_class_def.static_values_off);
    274   ir_class->annotations = ExtractAnnotations(dex_class_def.annotations_off);
    275   ir_class->orig_index = index;
    276 
    277   return ir_class;
    278 }
    279 
    280 ir::AnnotationsDirectory* Reader::ExtractAnnotations(dex::u4 offset) {
    281   if (offset == 0) {
    282     return nullptr;
    283   }
    284 
    285   SLICER_CHECK(offset % 4 == 0);
    286 
    287   // first check if we already extracted the same "annotations_directory_item"
    288   auto& ir_annotations = annotations_directories_[offset];
    289   if (ir_annotations == nullptr) {
    290     ir_annotations = dex_ir_->Alloc<ir::AnnotationsDirectory>();
    291 
    292     auto dex_annotations = dataPtr<dex::AnnotationsDirectoryItem>(offset);
    293 
    294     ir_annotations->class_annotation =
    295         ExtractAnnotationSet(dex_annotations->class_annotations_off);
    296 
    297     const dex::u1* ptr = reinterpret_cast<const dex::u1*>(dex_annotations + 1);
    298 
    299     for (dex::u4 i = 0; i < dex_annotations->fields_size; ++i) {
    300       ir_annotations->field_annotations.push_back(ParseFieldAnnotation(&ptr));
    301     }
    302 
    303     for (dex::u4 i = 0; i < dex_annotations->methods_size; ++i) {
    304       ir_annotations->method_annotations.push_back(ParseMethodAnnotation(&ptr));
    305     }
    306 
    307     for (dex::u4 i = 0; i < dex_annotations->parameters_size; ++i) {
    308       ir_annotations->param_annotations.push_back(ParseParamAnnotation(&ptr));
    309     }
    310   }
    311   return ir_annotations;
    312 }
    313 
    314 ir::Annotation* Reader::ExtractAnnotationItem(dex::u4 offset) {
    315   SLICER_CHECK(offset != 0);
    316 
    317   // first check if we already extracted the same "annotation_item"
    318   auto& ir_annotation = annotations_[offset];
    319   if (ir_annotation == nullptr) {
    320     auto dexAnnotationItem = dataPtr<dex::AnnotationItem>(offset);
    321     const dex::u1* ptr = dexAnnotationItem->annotation;
    322     ir_annotation = ParseAnnotation(&ptr);
    323     ir_annotation->visibility = dexAnnotationItem->visibility;
    324   }
    325   return ir_annotation;
    326 }
    327 
    328 ir::AnnotationSet* Reader::ExtractAnnotationSet(dex::u4 offset) {
    329   if (offset == 0) {
    330     return nullptr;
    331   }
    332 
    333   SLICER_CHECK(offset % 4 == 0);
    334 
    335   // first check if we already extracted the same "annotation_set_item"
    336   auto& ir_annotation_set = annotation_sets_[offset];
    337   if (ir_annotation_set == nullptr) {
    338     ir_annotation_set = dex_ir_->Alloc<ir::AnnotationSet>();
    339 
    340     auto dex_annotation_set = dataPtr<dex::AnnotationSetItem>(offset);
    341     for (dex::u4 i = 0; i < dex_annotation_set->size; ++i) {
    342       auto ir_annotation = ExtractAnnotationItem(dex_annotation_set->entries[i]);
    343       assert(ir_annotation != nullptr);
    344       ir_annotation_set->annotations.push_back(ir_annotation);
    345     }
    346   }
    347   return ir_annotation_set;
    348 }
    349 
    350 ir::AnnotationSetRefList* Reader::ExtractAnnotationSetRefList(dex::u4 offset) {
    351   SLICER_CHECK(offset % 4 == 0);
    352 
    353   auto dex_annotation_set_ref_list = dataPtr<dex::AnnotationSetRefList>(offset);
    354   auto ir_annotation_set_ref_list = dex_ir_->Alloc<ir::AnnotationSetRefList>();
    355 
    356   for (dex::u4 i = 0; i < dex_annotation_set_ref_list->size; ++i) {
    357     dex::u4 entry_offset = dex_annotation_set_ref_list->list[i].annotations_off;
    358     if (entry_offset != 0) {
    359       auto ir_annotation_set = ExtractAnnotationSet(entry_offset);
    360       SLICER_CHECK(ir_annotation_set != nullptr);
    361       ir_annotation_set_ref_list->annotations.push_back(ir_annotation_set);
    362     }
    363   }
    364 
    365   return ir_annotation_set_ref_list;
    366 }
    367 
    368 ir::FieldAnnotation* Reader::ParseFieldAnnotation(const dex::u1** pptr) {
    369   auto dex_field_annotation = reinterpret_cast<const dex::FieldAnnotationsItem*>(*pptr);
    370   auto ir_field_annotation = dex_ir_->Alloc<ir::FieldAnnotation>();
    371 
    372   ir_field_annotation->field_decl = GetFieldDecl(dex_field_annotation->field_idx);
    373 
    374   ir_field_annotation->annotations =
    375       ExtractAnnotationSet(dex_field_annotation->annotations_off);
    376   SLICER_CHECK(ir_field_annotation->annotations != nullptr);
    377 
    378   *pptr += sizeof(dex::FieldAnnotationsItem);
    379   return ir_field_annotation;
    380 }
    381 
    382 ir::MethodAnnotation* Reader::ParseMethodAnnotation(const dex::u1** pptr) {
    383   auto dex_method_annotation =
    384       reinterpret_cast<const dex::MethodAnnotationsItem*>(*pptr);
    385   auto ir_method_annotation = dex_ir_->Alloc<ir::MethodAnnotation>();
    386 
    387   ir_method_annotation->method_decl = GetMethodDecl(dex_method_annotation->method_idx);
    388 
    389   ir_method_annotation->annotations =
    390       ExtractAnnotationSet(dex_method_annotation->annotations_off);
    391   SLICER_CHECK(ir_method_annotation->annotations != nullptr);
    392 
    393   *pptr += sizeof(dex::MethodAnnotationsItem);
    394   return ir_method_annotation;
    395 }
    396 
    397 ir::ParamAnnotation* Reader::ParseParamAnnotation(const dex::u1** pptr) {
    398   auto dex_param_annotation =
    399       reinterpret_cast<const dex::ParameterAnnotationsItem*>(*pptr);
    400   auto ir_param_annotation = dex_ir_->Alloc<ir::ParamAnnotation>();
    401 
    402   ir_param_annotation->method_decl = GetMethodDecl(dex_param_annotation->method_idx);
    403 
    404   ir_param_annotation->annotations =
    405       ExtractAnnotationSetRefList(dex_param_annotation->annotations_off);
    406   SLICER_CHECK(ir_param_annotation->annotations != nullptr);
    407 
    408   *pptr += sizeof(dex::ParameterAnnotationsItem);
    409   return ir_param_annotation;
    410 }
    411 
    412 ir::EncodedField* Reader::ParseEncodedField(const dex::u1** pptr, dex::u4* base_index) {
    413   auto ir_encoded_field = dex_ir_->Alloc<ir::EncodedField>();
    414 
    415   auto field_index = dex::ReadULeb128(pptr);
    416   SLICER_CHECK(field_index != dex::kNoIndex);
    417   if (*base_index != dex::kNoIndex) {
    418     SLICER_CHECK(field_index != 0);
    419     field_index += *base_index;
    420   }
    421   *base_index = field_index;
    422 
    423   ir_encoded_field->decl = GetFieldDecl(field_index);
    424   ir_encoded_field->access_flags = dex::ReadULeb128(pptr);
    425 
    426   return ir_encoded_field;
    427 }
    428 
    429 // Parse an encoded variable-length integer value
    430 // (sign-extend signed types, zero-extend unsigned types)
    431 template <class T>
    432 static T ParseIntValue(const dex::u1** pptr, size_t size) {
    433   static_assert(std::is_integral<T>::value, "must be an integral type");
    434 
    435   SLICER_CHECK(size > 0);
    436   SLICER_CHECK(size <= sizeof(T));
    437 
    438   T value = 0;
    439   for (int i = 0; i < size; ++i) {
    440     value |= T(*(*pptr)++) << (i * 8);
    441   }
    442 
    443   // sign-extend?
    444   if (std::is_signed<T>::value) {
    445     size_t shift = (sizeof(T) - size) * 8;
    446     value = T(value << shift) >> shift;
    447   }
    448 
    449   return value;
    450 }
    451 
    452 // Parse an encoded variable-length floating point value
    453 // (zero-extend to the right)
    454 template <class T>
    455 static T ParseFloatValue(const dex::u1** pptr, size_t size) {
    456   SLICER_CHECK(size > 0);
    457   SLICER_CHECK(size <= sizeof(T));
    458 
    459   T value = 0;
    460   int start_byte = sizeof(T) - size;
    461   for (dex::u1* p = reinterpret_cast<dex::u1*>(&value) + start_byte; size > 0;
    462        --size) {
    463     *p++ = *(*pptr)++;
    464   }
    465   return value;
    466 }
    467 
    468 ir::EncodedValue* Reader::ParseEncodedValue(const dex::u1** pptr) {
    469   auto ir_encoded_value = dex_ir_->Alloc<ir::EncodedValue>();
    470 
    471   SLICER_EXTRA(auto base_ptr = *pptr);
    472 
    473   dex::u1 header = *(*pptr)++;
    474   dex::u1 type = header & dex::kEncodedValueTypeMask;
    475   dex::u1 arg = header >> dex::kEncodedValueArgShift;
    476 
    477   ir_encoded_value->type = type;
    478 
    479   switch (type) {
    480     case dex::kEncodedByte:
    481       ir_encoded_value->u.byte_value = ParseIntValue<int8_t>(pptr, arg + 1);
    482       break;
    483 
    484     case dex::kEncodedShort:
    485       ir_encoded_value->u.short_value = ParseIntValue<int16_t>(pptr, arg + 1);
    486       break;
    487 
    488     case dex::kEncodedChar:
    489       ir_encoded_value->u.char_value = ParseIntValue<uint16_t>(pptr, arg + 1);
    490       break;
    491 
    492     case dex::kEncodedInt:
    493       ir_encoded_value->u.int_value = ParseIntValue<int32_t>(pptr, arg + 1);
    494       break;
    495 
    496     case dex::kEncodedLong:
    497       ir_encoded_value->u.long_value = ParseIntValue<int64_t>(pptr, arg + 1);
    498       break;
    499 
    500     case dex::kEncodedFloat:
    501       ir_encoded_value->u.float_value = ParseFloatValue<float>(pptr, arg + 1);
    502       break;
    503 
    504     case dex::kEncodedDouble:
    505       ir_encoded_value->u.double_value = ParseFloatValue<double>(pptr, arg + 1);
    506       break;
    507 
    508     case dex::kEncodedString: {
    509       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
    510       ir_encoded_value->u.string_value = GetString(index);
    511     } break;
    512 
    513     case dex::kEncodedType: {
    514       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
    515       ir_encoded_value->u.type_value = GetType(index);
    516     } break;
    517 
    518     case dex::kEncodedField: {
    519       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
    520       ir_encoded_value->u.field_value = GetFieldDecl(index);
    521     } break;
    522 
    523     case dex::kEncodedMethod: {
    524       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
    525       ir_encoded_value->u.method_value = GetMethodDecl(index);
    526     } break;
    527 
    528     case dex::kEncodedEnum: {
    529       dex::u4 index = ParseIntValue<dex::u4>(pptr, arg + 1);
    530       ir_encoded_value->u.enum_value = GetFieldDecl(index);
    531     } break;
    532 
    533     case dex::kEncodedArray:
    534       SLICER_CHECK(arg == 0);
    535       ir_encoded_value->u.array_value = ParseEncodedArray(pptr);
    536       break;
    537 
    538     case dex::kEncodedAnnotation:
    539       SLICER_CHECK(arg == 0);
    540       ir_encoded_value->u.annotation_value = ParseAnnotation(pptr);
    541       break;
    542 
    543     case dex::kEncodedNull:
    544       SLICER_CHECK(arg == 0);
    545       break;
    546 
    547     case dex::kEncodedBoolean:
    548       SLICER_CHECK(arg < 2);
    549       ir_encoded_value->u.bool_value = (arg == 1);
    550       break;
    551 
    552     default:
    553       SLICER_CHECK(!"unexpected value type");
    554   }
    555 
    556   SLICER_EXTRA(ir_encoded_value->original = slicer::MemView(base_ptr, *pptr - base_ptr));
    557 
    558   return ir_encoded_value;
    559 }
    560 
    561 ir::Annotation* Reader::ParseAnnotation(const dex::u1** pptr) {
    562   auto ir_annotation = dex_ir_->Alloc<ir::Annotation>();
    563 
    564   dex::u4 type_index = dex::ReadULeb128(pptr);
    565   dex::u4 elements_count = dex::ReadULeb128(pptr);
    566 
    567   ir_annotation->type = GetType(type_index);
    568   ir_annotation->visibility = dex::kVisibilityEncoded;
    569 
    570   for (dex::u4 i = 0; i < elements_count; ++i) {
    571     auto ir_element = dex_ir_->Alloc<ir::AnnotationElement>();
    572 
    573     ir_element->name = GetString(dex::ReadULeb128(pptr));
    574     ir_element->value = ParseEncodedValue(pptr);
    575 
    576     ir_annotation->elements.push_back(ir_element);
    577   }
    578 
    579   return ir_annotation;
    580 }
    581 
    582 ir::EncodedArray* Reader::ParseEncodedArray(const dex::u1** pptr) {
    583   auto ir_encoded_array = dex_ir_->Alloc<ir::EncodedArray>();
    584 
    585   dex::u4 count = dex::ReadULeb128(pptr);
    586   for (dex::u4 i = 0; i < count; ++i) {
    587     ir_encoded_array->values.push_back(ParseEncodedValue(pptr));
    588   }
    589 
    590   return ir_encoded_array;
    591 }
    592 
    593 ir::EncodedArray* Reader::ExtractEncodedArray(dex::u4 offset) {
    594   if (offset == 0) {
    595     return nullptr;
    596   }
    597 
    598   // first check if we already extracted the same "annotation_item"
    599   auto& ir_encoded_array = encoded_arrays_[offset];
    600   if (ir_encoded_array == nullptr) {
    601     auto ptr = dataPtr<dex::u1>(offset);
    602     ir_encoded_array = ParseEncodedArray(&ptr);
    603   }
    604   return ir_encoded_array;
    605 }
    606 
    607 ir::DebugInfo* Reader::ExtractDebugInfo(dex::u4 offset) {
    608   if (offset == 0) {
    609     return nullptr;
    610   }
    611 
    612   auto ir_debug_info = dex_ir_->Alloc<ir::DebugInfo>();
    613   const dex::u1* ptr = dataPtr<dex::u1>(offset);
    614 
    615   ir_debug_info->line_start = dex::ReadULeb128(&ptr);
    616 
    617   // TODO: implicit this param for non-static methods?
    618   dex::u4 param_count = dex::ReadULeb128(&ptr);
    619   for (dex::u4 i = 0; i < param_count; ++i) {
    620     dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
    621     auto ir_string =
    622         (name_index == dex::kNoIndex) ? nullptr : GetString(name_index);
    623     ir_debug_info->param_names.push_back(ir_string);
    624   }
    625 
    626   // parse the debug info opcodes and note the
    627   // references to strings and types (to make sure the IR
    628   // is the full closure of all referenced items)
    629   //
    630   // TODO: design a generic debug info iterator?
    631   //
    632   auto base_ptr = ptr;
    633   dex::u1 opcode = 0;
    634   while ((opcode = *ptr++) != dex::DBG_END_SEQUENCE) {
    635     switch (opcode) {
    636       case dex::DBG_ADVANCE_PC:
    637         // addr_diff
    638         dex::ReadULeb128(&ptr);
    639         break;
    640 
    641       case dex::DBG_ADVANCE_LINE:
    642         // line_diff
    643         dex::ReadSLeb128(&ptr);
    644         break;
    645 
    646       case dex::DBG_START_LOCAL: {
    647         // register_num
    648         dex::ReadULeb128(&ptr);
    649 
    650         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
    651         if (name_index != dex::kNoIndex) {
    652           GetString(name_index);
    653         }
    654 
    655         dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
    656         if (type_index != dex::kNoIndex) {
    657           GetType(type_index);
    658         }
    659       } break;
    660 
    661       case dex::DBG_START_LOCAL_EXTENDED: {
    662         // register_num
    663         dex::ReadULeb128(&ptr);
    664 
    665         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
    666         if (name_index != dex::kNoIndex) {
    667           GetString(name_index);
    668         }
    669 
    670         dex::u4 type_index = dex::ReadULeb128(&ptr) - 1;
    671         if (type_index != dex::kNoIndex) {
    672           GetType(type_index);
    673         }
    674 
    675         dex::u4 sig_index = dex::ReadULeb128(&ptr) - 1;
    676         if (sig_index != dex::kNoIndex) {
    677           GetString(sig_index);
    678         }
    679       } break;
    680 
    681       case dex::DBG_END_LOCAL:
    682       case dex::DBG_RESTART_LOCAL:
    683         // register_num
    684         dex::ReadULeb128(&ptr);
    685         break;
    686 
    687       case dex::DBG_SET_FILE: {
    688         dex::u4 name_index = dex::ReadULeb128(&ptr) - 1;
    689         if (name_index != dex::kNoIndex) {
    690           GetString(name_index);
    691         }
    692       } break;
    693     }
    694   }
    695 
    696   ir_debug_info->data = slicer::MemView(base_ptr, ptr - base_ptr);
    697 
    698   return ir_debug_info;
    699 }
    700 
    701 ir::Code* Reader::ExtractCode(dex::u4 offset) {
    702   if (offset == 0) {
    703     return nullptr;
    704   }
    705 
    706   SLICER_CHECK(offset % 4 == 0);
    707 
    708   auto dex_code = dataPtr<dex::Code>(offset);
    709   auto ir_code = dex_ir_->Alloc<ir::Code>();
    710 
    711   ir_code->registers = dex_code->registers_size;
    712   ir_code->ins_count = dex_code->ins_size;
    713   ir_code->outs_count = dex_code->outs_size;
    714 
    715   // instructions array
    716   ir_code->instructions =
    717       slicer::ArrayView<const dex::u2>(dex_code->insns, dex_code->insns_size);
    718 
    719   // parse the instructions to discover references to other
    720   // IR nodes (see debug info stream parsing too)
    721   ParseInstructions(ir_code->instructions);
    722 
    723   // try blocks & handlers
    724   //
    725   // TODO: a generic try/catch blocks iterator?
    726   //
    727   if (dex_code->tries_size != 0) {
    728     dex::u4 aligned_count = (dex_code->insns_size + 1) / 2 * 2;
    729     auto tries =
    730         reinterpret_cast<const dex::TryBlock*>(dex_code->insns + aligned_count);
    731     auto handlers_list =
    732         reinterpret_cast<const dex::u1*>(tries + dex_code->tries_size);
    733 
    734     ir_code->try_blocks =
    735         slicer::ArrayView<const dex::TryBlock>(tries, dex_code->tries_size);
    736 
    737     // parse the handlers list (and discover embedded references)
    738     auto ptr = handlers_list;
    739 
    740     dex::u4 handlers_count = dex::ReadULeb128(&ptr);
    741     SLICER_WEAK_CHECK(handlers_count <= dex_code->tries_size);
    742 
    743     for (dex::u4 handler_index = 0; handler_index < handlers_count; ++handler_index) {
    744       int catch_count = dex::ReadSLeb128(&ptr);
    745 
    746       for (int catch_index = 0; catch_index < std::abs(catch_count); ++catch_index) {
    747         dex::u4 type_index = dex::ReadULeb128(&ptr);
    748         GetType(type_index);
    749 
    750         // address
    751         dex::ReadULeb128(&ptr);
    752       }
    753 
    754       if (catch_count < 1) {
    755         // catch_all_addr
    756         dex::ReadULeb128(&ptr);
    757       }
    758     }
    759 
    760     ir_code->catch_handlers = slicer::MemView(handlers_list, ptr - handlers_list);
    761   }
    762 
    763   ir_code->debug_info = ExtractDebugInfo(dex_code->debug_info_off);
    764 
    765   return ir_code;
    766 }
    767 
    768 ir::EncodedMethod* Reader::ParseEncodedMethod(const dex::u1** pptr, dex::u4* base_index) {
    769   auto ir_encoded_method = dex_ir_->Alloc<ir::EncodedMethod>();
    770 
    771   auto method_index = dex::ReadULeb128(pptr);
    772   SLICER_CHECK(method_index != dex::kNoIndex);
    773   if (*base_index != dex::kNoIndex) {
    774     SLICER_CHECK(method_index != 0);
    775     method_index += *base_index;
    776   }
    777   *base_index = method_index;
    778 
    779   ir_encoded_method->decl = GetMethodDecl(method_index);
    780   ir_encoded_method->access_flags = dex::ReadULeb128(pptr);
    781 
    782   dex::u4 code_offset = dex::ReadULeb128(pptr);
    783   ir_encoded_method->code = ExtractCode(code_offset);
    784 
    785   // update the methods lookup table
    786   dex_ir_->methods_lookup.Insert(ir_encoded_method);
    787 
    788   return ir_encoded_method;
    789 }
    790 
    791 ir::Type* Reader::ParseType(dex::u4 index) {
    792   auto& dex_type = TypeIds()[index];
    793   auto ir_type = dex_ir_->Alloc<ir::Type>();
    794 
    795   ir_type->descriptor = GetString(dex_type.descriptor_idx);
    796   ir_type->orig_index = index;
    797 
    798   return ir_type;
    799 }
    800 
    801 ir::FieldDecl* Reader::ParseFieldDecl(dex::u4 index) {
    802   auto& dex_field = FieldIds()[index];
    803   auto ir_field = dex_ir_->Alloc<ir::FieldDecl>();
    804 
    805   ir_field->name = GetString(dex_field.name_idx);
    806   ir_field->type = GetType(dex_field.type_idx);
    807   ir_field->parent = GetType(dex_field.class_idx);
    808   ir_field->orig_index = index;
    809 
    810   return ir_field;
    811 }
    812 
    813 ir::MethodDecl* Reader::ParseMethodDecl(dex::u4 index) {
    814   auto& dex_method = MethodIds()[index];
    815   auto ir_method = dex_ir_->Alloc<ir::MethodDecl>();
    816 
    817   ir_method->name = GetString(dex_method.name_idx);
    818   ir_method->prototype = GetProto(dex_method.proto_idx);
    819   ir_method->parent = GetType(dex_method.class_idx);
    820   ir_method->orig_index = index;
    821 
    822   return ir_method;
    823 }
    824 
    825 ir::TypeList* Reader::ExtractTypeList(dex::u4 offset) {
    826   if (offset == 0) {
    827     return nullptr;
    828   }
    829 
    830   // first check to see if we already extracted the same "type_list"
    831   auto& ir_type_list = type_lists_[offset];
    832   if (ir_type_list == nullptr) {
    833     ir_type_list = dex_ir_->Alloc<ir::TypeList>();
    834 
    835     auto dex_type_list = dataPtr<dex::TypeList>(offset);
    836     SLICER_WEAK_CHECK(dex_type_list->size > 0);
    837 
    838     for (dex::u4 i = 0; i < dex_type_list->size; ++i) {
    839       ir_type_list->types.push_back(GetType(dex_type_list->list[i].type_idx));
    840     }
    841   }
    842 
    843   return ir_type_list;
    844 }
    845 
    846 ir::Proto* Reader::ParseProto(dex::u4 index) {
    847   auto& dex_proto = ProtoIds()[index];
    848   auto ir_proto = dex_ir_->Alloc<ir::Proto>();
    849 
    850   ir_proto->shorty = GetString(dex_proto.shorty_idx);
    851   ir_proto->return_type = GetType(dex_proto.return_type_idx);
    852   ir_proto->param_types = ExtractTypeList(dex_proto.parameters_off);
    853   ir_proto->orig_index = index;
    854 
    855   // update the prototypes lookup table
    856   dex_ir_->prototypes_lookup.Insert(ir_proto);
    857 
    858   return ir_proto;
    859 }
    860 
    861 ir::String* Reader::ParseString(dex::u4 index) {
    862   auto ir_string = dex_ir_->Alloc<ir::String>();
    863 
    864   auto data = GetStringData(index);
    865   auto cstr = data;
    866   dex::ReadULeb128(&cstr);
    867   size_t size = (cstr - data) + ::strlen(reinterpret_cast<const char*>(cstr)) + 1;
    868 
    869   ir_string->data = slicer::MemView(data, size);
    870   ir_string->orig_index = index;
    871 
    872   // update the strings lookup table
    873   dex_ir_->strings_lookup.Insert(ir_string);
    874 
    875   return ir_string;
    876 }
    877 
    878 void Reader::ParseInstructions(slicer::ArrayView<const dex::u2> code) {
    879   const dex::u2* ptr = code.begin();
    880   while (ptr < code.end()) {
    881     auto dex_instr = dex::DecodeInstruction(ptr);
    882 
    883     dex::u4 index = dex::kNoIndex;
    884     switch (dex::GetFormatFromOpcode(dex_instr.opcode)) {
    885       case dex::kFmt20bc:
    886       case dex::kFmt21c:
    887       case dex::kFmt31c:
    888       case dex::kFmt35c:
    889       case dex::kFmt3rc:
    890         index = dex_instr.vB;
    891         break;
    892 
    893       case dex::kFmt22c:
    894         index = dex_instr.vC;
    895         break;
    896 
    897       default:
    898         break;
    899     }
    900 
    901     switch (GetIndexTypeFromOpcode(dex_instr.opcode)) {
    902       case dex::kIndexStringRef:
    903         GetString(index);
    904         break;
    905 
    906       case dex::kIndexTypeRef:
    907         GetType(index);
    908         break;
    909 
    910       case dex::kIndexFieldRef:
    911         GetFieldDecl(index);
    912         break;
    913 
    914       case dex::kIndexMethodRef:
    915         GetMethodDecl(index);
    916         break;
    917 
    918       default:
    919         break;
    920     }
    921 
    922     auto isize = dex::GetWidthFromBytecode(ptr);
    923     SLICER_CHECK(isize > 0);
    924     ptr += isize;
    925   }
    926   SLICER_CHECK(ptr == code.end());
    927 }
    928 
    929 // Basic .dex header structural checks
    930 void Reader::ValidateHeader() {
    931   SLICER_CHECK(size_ > sizeof(dex::Header));
    932 
    933   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
    934   // estimate. b/72402467
    935   SLICER_CHECK(header_->file_size <= size_);
    936   SLICER_CHECK(header_->header_size == sizeof(dex::Header));
    937   SLICER_CHECK(header_->endian_tag == dex::kEndianConstant);
    938   SLICER_CHECK(header_->data_size % 4 == 0);
    939 
    940   // Known issue: The fields might be slighly corrupted b/65452964
    941   // SLICER_CHECK(header_->data_off + header_->data_size <= size_);
    942 
    943   SLICER_CHECK(header_->string_ids_off % 4 == 0);
    944   SLICER_CHECK(header_->type_ids_size < 65536);
    945   SLICER_CHECK(header_->type_ids_off % 4 == 0);
    946   SLICER_CHECK(header_->proto_ids_size < 65536);
    947   SLICER_CHECK(header_->proto_ids_off % 4 == 0);
    948   SLICER_CHECK(header_->field_ids_off % 4 == 0);
    949   SLICER_CHECK(header_->method_ids_off % 4 == 0);
    950   SLICER_CHECK(header_->class_defs_off % 4 == 0);
    951   SLICER_CHECK(header_->map_off >= header_->data_off && header_->map_off < size_);
    952   SLICER_CHECK(header_->link_size == 0);
    953   SLICER_CHECK(header_->link_off == 0);
    954   SLICER_CHECK(header_->data_off % 4 == 0);
    955   SLICER_CHECK(header_->map_off % 4 == 0);
    956 
    957   // we seem to have .dex files with extra bytes at the end ...
    958   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
    959   // estimate. b/72402467
    960   SLICER_WEAK_CHECK(header_->data_off + header_->data_size <= size_);
    961 
    962   // but we should still have the whole data section
    963 
    964   // Known issue: The fields might be slighly corrupted b/65452964
    965   // Known issue: For performance reasons the initial size_ passed to jvmti events might be an
    966   // estimate. b/72402467
    967   // SLICER_CHECK(header_->data_off + header_->data_size <= size_);
    968 
    969   // validate the map
    970   // (map section size = sizeof(MapList::size) + sizeof(MapList::list[size])
    971   auto map_list = ptr<dex::MapList>(header_->map_off);
    972   SLICER_CHECK(map_list->size > 0);
    973   auto map_section_size =
    974       sizeof(dex::u4) + sizeof(dex::MapItem) * map_list->size;
    975   SLICER_CHECK(header_->map_off + map_section_size <= size_);
    976 }
    977 
    978 }  // namespace dex
    979