Home | History | Annotate | Download | only in slicer
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #pragma once
     18 
     19 #include "common.h"
     20 #include "memview.h"
     21 #include "arrayview.h"
     22 #include "dex_format.h"
     23 #include "dex_leb128.h"
     24 #include "buffer.h"
     25 #include "index_map.h"
     26 #include "hash_table.h"
     27 
     28 #include <stdlib.h>
     29 #include <map>
     30 #include <memory>
     31 #include <vector>
     32 #include <string>
     33 
     34 // A simple, lightweight IR to abstract the key .dex structures
     35 //
     36 // 1. All the cross-IR references are modeled as plain pointers.
     37 // 2. Newly allocated nodes are mem-zeroed first
     38 //
     39 // This IR can mirror any .dex file, although for JVMTI BCI
     40 // it's expected to construct the IR for the single modified class only
     41 // (and include only the nodes referenced from that class)
     42 
     43 #define SLICER_IR_TYPE     \
     44   using Node::Node; \
     45   friend struct DexFile;
     46 
     47 #define SLICER_IR_INDEXED_TYPE           \
     48   using IndexedNode::IndexedNode; \
     49   friend struct DexFile;
     50 
     51 namespace ir {
     52 
     53 // convenience notation
     54 template <class T>
     55 using own = std::unique_ptr<T>;
     56 
     57 struct Node;
     58 struct IndexedNode;
     59 struct EncodedValue;
     60 struct EncodedArray;
     61 struct String;
     62 struct Type;
     63 struct TypeList;
     64 struct Proto;
     65 struct FieldDecl;
     66 struct EncodedField;
     67 struct DebugInfo;
     68 struct Code;
     69 struct MethodDecl;
     70 struct EncodedMethod;
     71 struct AnnotationElement;
     72 struct Annotation;
     73 struct AnnotationSet;
     74 struct AnnotationSetRefList;
     75 struct FieldAnnotation;
     76 struct MethodAnnotation;
     77 struct ParamAnnotation;
     78 struct AnnotationsDirectory;
     79 struct Class;
     80 struct DexFile;
     81 
     82 // The base class for all the .dex IR types:
     83 //   This is not a polymorphic interface, but
     84 //   a way to constrain the allocation and ownership
     85 //   of .dex IR nodes.
     86 struct Node {
     87   void* operator new(size_t size) {
     88     return ::calloc(1, size);
     89   }
     90 
     91   void* operator new[](size_t size) {
     92     return ::calloc(1, size);
     93   }
     94 
     95   void operator delete(void* ptr) {
     96     ::free(ptr);
     97   }
     98 
     99   void operator delete[](void* ptr) {
    100     ::free(ptr);
    101   }
    102 
    103  public:
    104   Node(const Node&) = delete;
    105   Node& operator=(const Node&) = delete;
    106 
    107  protected:
    108   Node() = default;
    109   ~Node() = default;
    110 };
    111 
    112 // a concession for the convenience of the .dex writer
    113 //
    114 // TODO: consider moving the indexing to the writer.
    115 //
    116 struct IndexedNode : public Node {
    117   SLICER_IR_TYPE;
    118 
    119   // this is the index in the generated image
    120   // (not the original index)
    121   dex::u4 index;
    122 
    123   // original indexe
    124   // (from the source .dex image or allocated post reader)
    125   dex::u4 orig_index;
    126 };
    127 
    128 struct EncodedValue : public Node {
    129   SLICER_IR_TYPE;
    130 
    131   dex::u1 type;
    132   union {
    133     int8_t byte_value;
    134     int16_t short_value;
    135     uint16_t char_value;
    136     int32_t int_value;
    137     int64_t long_value;
    138     float float_value;
    139     double double_value;
    140     String* string_value;
    141     Type* type_value;
    142     FieldDecl* field_value;
    143     MethodDecl* method_value;
    144     FieldDecl* enum_value;
    145     EncodedArray* array_value;
    146     Annotation* annotation_value;
    147     bool bool_value;
    148   } u;
    149 
    150   SLICER_EXTRA(slicer::MemView original);
    151 };
    152 
    153 struct EncodedArray : public Node {
    154   SLICER_IR_TYPE;
    155 
    156   std::vector<EncodedValue*> values;
    157 };
    158 
    159 struct String : public IndexedNode {
    160   SLICER_IR_INDEXED_TYPE;
    161 
    162   // opaque DEX "string_data_item"
    163   slicer::MemView data;
    164 
    165   const char* c_str() const {
    166     const dex::u1* strData = data.ptr<dex::u1>();
    167     dex::ReadULeb128(&strData);
    168     return reinterpret_cast<const char*>(strData);
    169   }
    170 };
    171 
    172 struct Type : public IndexedNode {
    173   SLICER_IR_INDEXED_TYPE;
    174 
    175   enum class Category { Void, Scalar, WideScalar, Reference };
    176 
    177   String* descriptor;
    178   Class* class_def;
    179 
    180   std::string Decl() const;
    181   Category GetCategory() const;
    182 };
    183 
    184 struct TypeList : public Node {
    185   SLICER_IR_TYPE;
    186 
    187   std::vector<Type*> types;
    188 };
    189 
    190 struct Proto : public IndexedNode {
    191   SLICER_IR_INDEXED_TYPE;
    192 
    193   String* shorty;
    194   Type* return_type;
    195   TypeList* param_types;
    196 
    197   std::string Signature() const;
    198 };
    199 
    200 struct FieldDecl : public IndexedNode {
    201   SLICER_IR_INDEXED_TYPE;
    202 
    203   String* name;
    204   Type* type;
    205   Type* parent;
    206 };
    207 
    208 struct EncodedField : public Node {
    209   SLICER_IR_TYPE;
    210 
    211   FieldDecl* decl;
    212   dex::u4 access_flags;
    213 };
    214 
    215 struct DebugInfo : public Node {
    216   SLICER_IR_TYPE;
    217 
    218   dex::u4 line_start;
    219   std::vector<String*> param_names;
    220 
    221   // original debug info opcodes stream
    222   // (must be "relocated" when creating a new .dex image)
    223   slicer::MemView data;
    224 };
    225 
    226 struct Code : public Node {
    227   SLICER_IR_TYPE;
    228 
    229   dex::u2 registers;
    230   dex::u2 ins_count;
    231   dex::u2 outs_count;
    232   slicer::ArrayView<const dex::u2> instructions;
    233   slicer::ArrayView<const dex::TryBlock> try_blocks;
    234   slicer::MemView catch_handlers;
    235   DebugInfo* debug_info;
    236 };
    237 
    238 struct MethodDecl : public IndexedNode {
    239   SLICER_IR_INDEXED_TYPE;
    240 
    241   String* name;
    242   Proto* prototype;
    243   Type* parent;
    244 };
    245 
    246 struct EncodedMethod : public Node {
    247   SLICER_IR_TYPE;
    248 
    249   MethodDecl* decl;
    250   Code* code;
    251   dex::u4 access_flags;
    252 };
    253 
    254 struct AnnotationElement : public Node {
    255   SLICER_IR_TYPE;
    256 
    257   String* name;
    258   EncodedValue* value;
    259 };
    260 
    261 struct Annotation : public Node {
    262   SLICER_IR_TYPE;
    263 
    264   Type* type;
    265   std::vector<AnnotationElement*> elements;
    266   dex::u1 visibility;
    267 };
    268 
    269 struct AnnotationSet : public Node {
    270   SLICER_IR_TYPE;
    271 
    272   std::vector<Annotation*> annotations;
    273 };
    274 
    275 struct AnnotationSetRefList : public Node {
    276   SLICER_IR_TYPE;
    277 
    278   std::vector<AnnotationSet*> annotations;
    279 };
    280 
    281 struct FieldAnnotation : public Node {
    282   SLICER_IR_TYPE;
    283 
    284   FieldDecl* field_decl;
    285   AnnotationSet* annotations;
    286 };
    287 
    288 struct MethodAnnotation : public Node {
    289   SLICER_IR_TYPE;
    290 
    291   MethodDecl* method_decl;
    292   AnnotationSet* annotations;
    293 };
    294 
    295 struct ParamAnnotation : public Node {
    296   SLICER_IR_TYPE;
    297 
    298   MethodDecl* method_decl;
    299   AnnotationSetRefList* annotations;
    300 };
    301 
    302 struct AnnotationsDirectory : public Node {
    303   SLICER_IR_TYPE;
    304 
    305   AnnotationSet* class_annotation;
    306   std::vector<FieldAnnotation*> field_annotations;
    307   std::vector<MethodAnnotation*> method_annotations;
    308   std::vector<ParamAnnotation*> param_annotations;
    309 };
    310 
    311 struct Class : public IndexedNode {
    312   SLICER_IR_INDEXED_TYPE;
    313 
    314   Type* type;
    315   dex::u4 access_flags;
    316   Type* super_class;
    317   TypeList* interfaces;
    318   String* source_file;
    319   AnnotationsDirectory* annotations;
    320   EncodedArray* static_init;
    321 
    322   std::vector<EncodedField*> static_fields;
    323   std::vector<EncodedField*> instance_fields;
    324   std::vector<EncodedMethod*> direct_methods;
    325   std::vector<EncodedMethod*> virtual_methods;
    326 };
    327 
    328 // ir::String hashing
    329 struct StringsHasher {
    330   const char* GetKey(const String* string) const { return string->c_str(); }
    331   uint32_t Hash(const char* string_key) const;
    332   bool Compare(const char* string_key, const String* string) const;
    333 };
    334 
    335 // ir::Proto hashing
    336 struct ProtosHasher {
    337   std::string GetKey(const Proto* proto) const { return proto->Signature(); }
    338   uint32_t Hash(const std::string& proto_key) const;
    339   bool Compare(const std::string& proto_key, const Proto* proto) const;
    340 };
    341 
    342 // ir::EncodedMethod hashing
    343 struct MethodKey {
    344   String* class_descriptor = nullptr;
    345   String* method_name = nullptr;
    346   Proto* prototype = nullptr;
    347 };
    348 
    349 struct MethodsHasher {
    350   MethodKey GetKey(const EncodedMethod* method) const;
    351   uint32_t Hash(const MethodKey& method_key) const;
    352   bool Compare(const MethodKey& method_key, const EncodedMethod* method) const;
    353 };
    354 
    355 using StringsLookup = slicer::HashTable<const char*, String, StringsHasher>;
    356 using PrototypesLookup = slicer::HashTable<const std::string&, Proto, ProtosHasher>;
    357 using MethodsLookup = slicer::HashTable<const MethodKey&, EncodedMethod, MethodsHasher>;
    358 
    359 // The main container/root for a .dex IR
    360 struct DexFile {
    361   // indexed structures
    362   std::vector<own<String>> strings;
    363   std::vector<own<Type>> types;
    364   std::vector<own<Proto>> protos;
    365   std::vector<own<FieldDecl>> fields;
    366   std::vector<own<MethodDecl>> methods;
    367   std::vector<own<Class>> classes;
    368 
    369   // data segment structures
    370   std::vector<own<EncodedField>> encoded_fields;
    371   std::vector<own<EncodedMethod>> encoded_methods;
    372   std::vector<own<TypeList>> type_lists;
    373   std::vector<own<Code>> code;
    374   std::vector<own<DebugInfo>> debug_info;
    375   std::vector<own<EncodedValue>> encoded_values;
    376   std::vector<own<EncodedArray>> encoded_arrays;
    377   std::vector<own<Annotation>> annotations;
    378   std::vector<own<AnnotationElement>> annotation_elements;
    379   std::vector<own<AnnotationSet>> annotation_sets;
    380   std::vector<own<AnnotationSetRefList>> annotation_set_ref_lists;
    381   std::vector<own<AnnotationsDirectory>> annotations_directories;
    382   std::vector<own<FieldAnnotation>> field_annotations;
    383   std::vector<own<MethodAnnotation>> method_annotations;
    384   std::vector<own<ParamAnnotation>> param_annotations;
    385 
    386   // original index to IR node mappings
    387   //
    388   // CONSIDER: we only need to carry around
    389   //   the relocation for the referenced items
    390   //
    391   std::map<dex::u4, Type*> types_map;
    392   std::map<dex::u4, String*> strings_map;
    393   std::map<dex::u4, Proto*> protos_map;
    394   std::map<dex::u4, FieldDecl*> fields_map;
    395   std::map<dex::u4, MethodDecl*> methods_map;
    396   std::map<dex::u4, Class*> classes_map;
    397 
    398   // original .dex header "magic" signature
    399   slicer::MemView magic;
    400 
    401   // keep track of the used index values
    402   // (so we can easily allocate new ones)
    403   IndexMap strings_indexes;
    404   IndexMap types_indexes;
    405   IndexMap protos_indexes;
    406   IndexMap fields_indexes;
    407   IndexMap methods_indexes;
    408   IndexMap classes_indexes;
    409 
    410   // lookup hash tables
    411   StringsLookup strings_lookup;
    412   MethodsLookup methods_lookup;
    413   PrototypesLookup prototypes_lookup;
    414 
    415  public:
    416   DexFile() = default;
    417 
    418   // No copy/move semantics
    419   DexFile(const DexFile&) = delete;
    420   DexFile& operator=(const DexFile&) = delete;
    421 
    422   template <class T>
    423   T* Alloc() {
    424     T* p = new T();
    425     Track(p);
    426     return p;
    427   }
    428 
    429   void AttachBuffer(slicer::Buffer&& buffer) {
    430     buffers_.push_back(std::move(buffer));
    431   }
    432 
    433   void Normalize();
    434 
    435  private:
    436   void TopSortClassIndex(Class* irClass, dex::u4* nextIndex);
    437   void SortClassIndexes();
    438 
    439   template <class T>
    440   void PushOwn(std::vector<own<T>>& v, T* p) {
    441     v.push_back(own<T>(p));
    442   }
    443 
    444   void Track(String* p) { PushOwn(strings, p); }
    445   void Track(Type* p) { PushOwn(types, p); }
    446   void Track(Proto* p) { PushOwn(protos, p); }
    447   void Track(FieldDecl* p) { PushOwn(fields, p); }
    448   void Track(MethodDecl* p) { PushOwn(methods, p); }
    449   void Track(Class* p) { PushOwn(classes, p); }
    450 
    451   void Track(EncodedField* p) { PushOwn(encoded_fields, p); }
    452   void Track(EncodedMethod* p) { PushOwn(encoded_methods, p); }
    453   void Track(TypeList* p) { PushOwn(type_lists, p); }
    454   void Track(Code* p) { PushOwn(code, p); }
    455   void Track(DebugInfo* p) { PushOwn(debug_info, p); }
    456   void Track(EncodedValue* p) { PushOwn(encoded_values, p); }
    457   void Track(EncodedArray* p) { PushOwn(encoded_arrays, p); }
    458   void Track(Annotation* p) { PushOwn(annotations, p); }
    459   void Track(AnnotationElement* p) { PushOwn(annotation_elements, p); }
    460   void Track(AnnotationSet* p) { PushOwn(annotation_sets, p); }
    461   void Track(AnnotationSetRefList* p) { PushOwn(annotation_set_ref_lists, p); }
    462   void Track(AnnotationsDirectory* p) { PushOwn(annotations_directories, p); }
    463   void Track(FieldAnnotation* p) { PushOwn(field_annotations, p); }
    464   void Track(MethodAnnotation* p) { PushOwn(method_annotations, p); }
    465   void Track(ParamAnnotation* p) { PushOwn(param_annotations, p); }
    466 
    467 private:
    468   // additional memory buffers owned by this .dex IR
    469   std::vector<slicer::Buffer> buffers_;
    470 };
    471 
    472 }  // namespace ir
    473 
    474 #undef SLICER_IR_TYPE
    475 #undef SLICER_IR_INDEXED_TYPE
    476