Home | History | Annotate | Download | only in slicer
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "slicer/dex_ir.h"
     18 #include "slicer/chronometer.h"
     19 #include "slicer/dex_utf8.h"
     20 #include "slicer/dex_format.h"
     21 
     22 #include <algorithm>
     23 #include <cstdint>
     24 #include <map>
     25 #include <memory>
     26 #include <vector>
     27 #include <sstream>
     28 #include <functional>
     29 
     30 namespace ir {
     31 
     32 // DBJ2a string hash
     33 static uint32_t HashString(const char* cstr) {
     34   uint32_t hash = 5381;  // DBJ2 magic prime value
     35   while (*cstr) {
     36     hash = ((hash << 5) + hash) ^ *cstr++;
     37   }
     38   return hash;
     39 }
     40 
     41 uint32_t StringsHasher::Hash(const char* string_key) const {
     42   return HashString(string_key);
     43 }
     44 
     45 bool StringsHasher::Compare(const char* string_key, const String* string) const {
     46   return dex::Utf8Cmp(string_key, string->c_str()) == 0;
     47 }
     48 
     49 uint32_t ProtosHasher::Hash(const std::string& proto_key) const {
     50   return HashString(proto_key.c_str());
     51 }
     52 
     53 bool ProtosHasher::Compare(const std::string& proto_key, const Proto* proto) const {
     54   return proto_key == proto->Signature();
     55 }
     56 
     57 MethodKey MethodsHasher::GetKey(const EncodedMethod* method) const {
     58   MethodKey method_key;
     59   method_key.class_descriptor = method->decl->parent->descriptor;
     60   method_key.method_name = method->decl->name;
     61   method_key.prototype = method->decl->prototype;
     62   return method_key;
     63 }
     64 
     65 uint32_t MethodsHasher::Hash(const MethodKey& method_key) const {
     66   return static_cast<uint32_t>(std::hash<void*>{}(method_key.class_descriptor) ^
     67                                std::hash<void*>{}(method_key.method_name) ^
     68                                std::hash<void*>{}(method_key.prototype));
     69 }
     70 
     71 bool MethodsHasher::Compare(const MethodKey& method_key, const EncodedMethod* method) const {
     72   return method_key.class_descriptor == method->decl->parent->descriptor &&
     73          method_key.method_name == method->decl->name &&
     74          method_key.prototype == method->decl->prototype;
     75 }
     76 
     77 // Human-readable type declaration
     78 std::string Type::Decl() const {
     79   return dex::DescriptorToDecl(descriptor->c_str());
     80 }
     81 
     82 Type::Category Type::GetCategory() const {
     83   switch (*descriptor->c_str()) {
     84     case 'L':
     85     case '[':
     86       return Category::Reference;
     87     case 'V':
     88       return Category::Void;
     89     case 'D':
     90     case 'J':
     91       return Category::WideScalar;
     92     default:
     93       return Category::Scalar;
     94   }
     95 }
     96 
     97 // Create the corresponding JNI signature:
     98 //  https://docs.oracle.com/javase/8/docs/technotes/guides/jni/spec/types.html#type_signatures
     99 std::string Proto::Signature() const {
    100   std::stringstream ss;
    101   ss << "(";
    102   if (param_types != nullptr) {
    103     for (const auto& type : param_types->types) {
    104       ss << type->descriptor->c_str();
    105     }
    106   }
    107   ss << ")";
    108   ss << return_type->descriptor->c_str();
    109   return ss.str();
    110 }
    111 
    112 // Helper for IR normalization
    113 // (it sorts items and update the numeric idexes to match)
    114 template <class T, class C>
    115 static void IndexItems(std::vector<T>& items, C comp) {
    116   std::sort(items.begin(), items.end(), comp);
    117   for (size_t i = 0; i < items.size(); ++i) {
    118     items[i]->index = i;
    119   }
    120 }
    121 
    122 // Helper for IR normalization (DFS for topological sort)
    123 //
    124 // NOTE: this recursive version is clean and simple and we know
    125 //  that the max depth is bounded (exactly 1 for JVMTI and a small
    126 //  max for general case - the largest .dex file in AOSP has 5000 classes
    127 //  total)
    128 //
    129 void DexFile::TopSortClassIndex(Class* irClass, dex::u4* nextIndex) {
    130   if (irClass->index == dex::u4(-1)) {
    131     if (irClass->super_class && irClass->super_class->class_def) {
    132       TopSortClassIndex(irClass->super_class->class_def, nextIndex);
    133     }
    134 
    135     if (irClass->interfaces) {
    136       for (Type* interfaceType : irClass->interfaces->types) {
    137         if (interfaceType->class_def) {
    138           TopSortClassIndex(interfaceType->class_def, nextIndex);
    139         }
    140       }
    141     }
    142 
    143     SLICER_CHECK(*nextIndex < classes.size());
    144     irClass->index = (*nextIndex)++;
    145   }
    146 }
    147 
    148 // Helper for IR normalization
    149 // (topological sort the classes)
    150 void DexFile::SortClassIndexes() {
    151   for (auto& irClass : classes) {
    152     irClass->index = dex::u4(-1);
    153   }
    154 
    155   dex::u4 nextIndex = 0;
    156   for (auto& irClass : classes) {
    157     TopSortClassIndex(irClass.get(), &nextIndex);
    158   }
    159 }
    160 
    161 // Helper for NormalizeClass()
    162 static void SortEncodedFields(std::vector<EncodedField*>* fields) {
    163   std::sort(fields->begin(), fields->end(),
    164             [](const EncodedField* a, const EncodedField* b) {
    165               SLICER_CHECK(a->decl->index != b->decl->index || a == b);
    166               return a->decl->index < b->decl->index;
    167             });
    168 }
    169 
    170 // Helper for NormalizeClass()
    171 static void SortEncodedMethods(std::vector<EncodedMethod*>* methods) {
    172   std::sort(methods->begin(), methods->end(),
    173             [](const EncodedMethod* a, const EncodedMethod* b) {
    174               SLICER_CHECK(a->decl->index != b->decl->index || a == b);
    175               return a->decl->index < b->decl->index;
    176             });
    177 }
    178 
    179 // Helper for IR normalization
    180 // (sort the field & method arrays)
    181 static void NormalizeClass(Class* irClass) {
    182   SortEncodedFields(&irClass->static_fields);
    183   SortEncodedFields(&irClass->instance_fields);
    184   SortEncodedMethods(&irClass->direct_methods);
    185   SortEncodedMethods(&irClass->virtual_methods);
    186 }
    187 
    188 // Prepare the IR for generating a .dex image
    189 // (the .dex format requires a specific sort order for some of the arrays, etc...)
    190 //
    191 // TODO: not a great solution - move this logic to the writer!
    192 //
    193 // TODO: the comparison predicate can be better expressed by using std::tie()
    194 //  Ex. FieldDecl has a method comp() returning tie(parent->index, name->index, type->index)
    195 //
    196 void DexFile::Normalize() {
    197   // sort build the .dex indexes
    198   IndexItems(strings, [](const own<String>& a, const own<String>& b) {
    199     // this list must be sorted by std::string contents, using UTF-16 code point values
    200     // (not in a locale-sensitive manner)
    201     return dex::Utf8Cmp(a->c_str(), b->c_str()) < 0;
    202   });
    203 
    204   IndexItems(types, [](const own<Type>& a, const own<Type>& b) {
    205     // this list must be sorted by string_id index
    206     return a->descriptor->index < b->descriptor->index;
    207   });
    208 
    209   IndexItems(protos, [](const own<Proto>& a, const own<Proto>& b) {
    210     // this list must be sorted in return-type (by type_id index) major order,
    211     // and then by argument list (lexicographic ordering, individual arguments
    212     // ordered by type_id index)
    213     if (a->return_type->index != b->return_type->index) {
    214       return a->return_type->index < b->return_type->index;
    215     } else {
    216       std::vector<Type*> empty;
    217       const auto& aParamTypes = a->param_types ? a->param_types->types : empty;
    218       const auto& bParamTypes = b->param_types ? b->param_types->types : empty;
    219       return std::lexicographical_compare(
    220           aParamTypes.begin(), aParamTypes.end(), bParamTypes.begin(),
    221           bParamTypes.end(),
    222           [](const Type* t1, const Type* t2) { return t1->index < t2->index; });
    223     }
    224   });
    225 
    226   IndexItems(fields, [](const own<FieldDecl>& a, const own<FieldDecl>& b) {
    227     // this list must be sorted, where the defining type (by type_id index) is
    228     // the major order, field name (by string_id index) is the intermediate
    229     // order, and type (by type_id index) is the minor order
    230     return (a->parent->index != b->parent->index)
    231                ? a->parent->index < b->parent->index
    232                : (a->name->index != b->name->index)
    233                      ? a->name->index < b->name->index
    234                      : a->type->index < b->type->index;
    235   });
    236 
    237   IndexItems(methods, [](const own<MethodDecl>& a, const own<MethodDecl>& b) {
    238     // this list must be sorted, where the defining type (by type_id index) is
    239     // the major order, method name (by string_id index) is the intermediate
    240     // order, and method prototype (by proto_id index) is the minor order
    241     return (a->parent->index != b->parent->index)
    242                ? a->parent->index < b->parent->index
    243                : (a->name->index != b->name->index)
    244                      ? a->name->index < b->name->index
    245                      : a->prototype->index < b->prototype->index;
    246   });
    247 
    248   // reverse topological sort
    249   //
    250   // the classes must be ordered such that a given class's superclass and
    251   // implemented interfaces appear in the list earlier than the referring
    252   // class
    253   //
    254   // CONSIDER: for the BCI-only scenario we can avoid this
    255   //
    256   SortClassIndexes();
    257 
    258   IndexItems(classes, [&](const own<Class>& a, const own<Class>& b) {
    259     SLICER_CHECK(a->index < classes.size());
    260     SLICER_CHECK(b->index < classes.size());
    261     SLICER_CHECK(a->index != b->index || a == b);
    262     return a->index < b->index;
    263   });
    264 
    265   // normalize class data
    266   for (const auto& irClass : classes) {
    267     NormalizeClass(irClass.get());
    268   }
    269 
    270   // normalize annotations
    271   for (const auto& irAnnotation : annotations) {
    272     // elements must be sorted in increasing order by string_id index
    273     auto& elements = irAnnotation->elements;
    274     std::sort(elements.begin(), elements.end(),
    275               [](const AnnotationElement* a, const AnnotationElement* b) {
    276                 return a->name->index < b->name->index;
    277               });
    278   }
    279 
    280   // normalize "annotation_set_item"
    281   for (const auto& irAnnotationSet : annotation_sets) {
    282     // The elements must be sorted in increasing order, by type_idx
    283     auto& annotations = irAnnotationSet->annotations;
    284     std::sort(annotations.begin(), annotations.end(),
    285               [](const Annotation* a, const Annotation* b) {
    286                 return a->type->index < b->type->index;
    287               });
    288   }
    289 
    290   // normalize "annotations_directory_item"
    291   for (const auto& irAnnotationDirectory : annotations_directories) {
    292     // field_annotations: The elements of the list must be
    293     // sorted in increasing order, by field_idx
    294     auto& field_annotations = irAnnotationDirectory->field_annotations;
    295     std::sort(field_annotations.begin(), field_annotations.end(),
    296               [](const FieldAnnotation* a, const FieldAnnotation* b) {
    297                 return a->field_decl->index < b->field_decl->index;
    298               });
    299 
    300     // method_annotations: The elements of the list must be
    301     // sorted in increasing order, by method_idx
    302     auto& method_annotations = irAnnotationDirectory->method_annotations;
    303     std::sort(method_annotations.begin(), method_annotations.end(),
    304               [](const MethodAnnotation* a, const MethodAnnotation* b) {
    305                 return a->method_decl->index < b->method_decl->index;
    306               });
    307 
    308     // parameter_annotations: The elements of the list must be
    309     // sorted in increasing order, by method_idx
    310     auto& param_annotations = irAnnotationDirectory->param_annotations;
    311     std::sort(param_annotations.begin(), param_annotations.end(),
    312               [](const ParamAnnotation* a, const ParamAnnotation* b) {
    313                 return a->method_decl->index < b->method_decl->index;
    314               });
    315   }
    316 }
    317 
    318 } // namespace ir
    319 
    320