Home | History | Annotate | Download | only in analysis
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Dalvik bytecode verifier.
     19  */
     20 #ifndef _DALVIK_CODEVERIFY
     21 #define _DALVIK_CODEVERIFY
     22 
     23 #include "analysis/VerifySubs.h"
     24 
     25 
     26 /*
     27  * Enumeration for register type values.  The "hi" piece of a 64-bit value
     28  * MUST immediately follow the "lo" piece in the enumeration, so we can check
     29  * that hi==lo+1.
     30  *
     31  * Assignment of constants:
     32  *   [-MAXINT,-32768)   : integer
     33  *   [-32768,-128)      : short
     34  *   [-128,0)           : byte
     35  *   0                  : zero
     36  *   1                  : one
     37  *   [2,128)            : posbyte
     38  *   [128,32768)        : posshort
     39  *   [32768,65536)      : char
     40  *   [65536,MAXINT]     : integer
     41  *
     42  * Allowed "implicit" widening conversions:
     43  *   zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
     44  *   one -> boolean, posbyte, byte, posshort, short, char, integer
     45  *   boolean -> posbyte, byte, posshort, short, char, integer
     46  *   posbyte -> posshort, short, integer, char
     47  *   byte -> short, integer
     48  *   posshort -> integer, char
     49  *   short -> integer
     50  *   char -> integer
     51  *
     52  * In addition, all of the above can convert to "float".
     53  *
     54  * We're more careful with integer values than the spec requires.  The
     55  * motivation is to restrict byte/char/short to the correct range of values.
     56  * For example, if a method takes a byte argument, we don't want to allow
     57  * the code to load the constant "1024" and pass it in.
     58  */
     59 enum {
     60     kRegTypeUnknown = 0,    /* initial state; use value=0 so calloc works */
     61     kRegTypeUninit = 1,     /* MUST be odd to distinguish from pointer */
     62     kRegTypeConflict,       /* merge clash makes this reg's type unknowable */
     63 
     64     /*
     65      * Category-1nr types.  The order of these is chiseled into a couple
     66      * of tables, so don't add, remove, or reorder if you can avoid it.
     67      */
     68 #define kRegType1nrSTART    kRegTypeFloat
     69     kRegTypeFloat,
     70     kRegTypeZero,           /* 32-bit 0, could be Boolean, Int, Float, or Ref */
     71     kRegTypeOne,            /* 32-bit 1, could be Boolean, Int, Float */
     72     kRegTypeBoolean,        /* must be 0 or 1 */
     73     kRegTypePosByte,        /* byte, known positive (can become char) */
     74     kRegTypeByte,
     75     kRegTypePosShort,       /* short, known positive (can become char) */
     76     kRegTypeShort,
     77     kRegTypeChar,
     78     kRegTypeInteger,
     79 #define kRegType1nrEND      kRegTypeInteger
     80 
     81     kRegTypeLongLo,         /* lower-numbered register; endian-independent */
     82     kRegTypeLongHi,
     83     kRegTypeDoubleLo,
     84     kRegTypeDoubleHi,
     85 
     86     /*
     87      * Enumeration max; this is used with "full" (32-bit) RegType values.
     88      *
     89      * Anything larger than this is a ClassObject or uninit ref.  Mask off
     90      * all but the low 8 bits; if you're left with kRegTypeUninit, pull
     91      * the uninit index out of the high 24.  Because kRegTypeUninit has an
     92      * odd value, there is no risk of a particular ClassObject pointer bit
     93      * pattern being confused for it (assuming our class object allocator
     94      * uses word alignment).
     95      */
     96     kRegTypeMAX
     97 };
     98 #define kRegTypeUninitMask  0xff
     99 #define kRegTypeUninitShift 8
    100 
    101 /*
    102  * RegType holds information about the type of data held in a register.
    103  * For most types it's a simple enum.  For reference types it holds a
    104  * pointer to the ClassObject, and for uninitialized references it holds
    105  * an index into the UninitInstanceMap.
    106  */
    107 typedef u4 RegType;
    108 
    109 /*
    110  * Table that maps uninitialized instances to classes, based on the
    111  * address of the new-instance instruction.
    112  */
    113 typedef struct UninitInstanceMap {
    114     int numEntries;
    115     struct {
    116         int             addr;   /* code offset, or -1 for method arg ("this") */
    117         ClassObject*    clazz;  /* class created at this address */
    118     } map[1];
    119 } UninitInstanceMap;
    120 #define kUninitThisArgAddr  (-1)
    121 #define kUninitThisArgSlot  0
    122 
    123 /*
    124  * Various bits of data generated by the verifier, wrapped up in a package
    125  * for ease of use by the register map generator.
    126  */
    127 typedef struct VerifierData {
    128     /*
    129      * The method we're working on.
    130      */
    131     const Method*   method;
    132 
    133     /*
    134      * Number of code units of instructions in the method.  A cache of the
    135      * value calculated by dvmGetMethodInsnsSize().
    136      */
    137     u4              insnsSize;
    138 
    139     /*
    140      * Number of registers we track for each instruction.  This is equal
    141      * to the method's declared "registersSize".  (Does not include the
    142      * pending return value.)
    143      */
    144     u4              insnRegCount;
    145 
    146     /*
    147      * Instruction widths and flags, one entry per code unit.
    148      */
    149     InsnFlags*      insnFlags;
    150 
    151     /*
    152      * Uninitialized instance map, used for tracking the movement of
    153      * objects that have been allocated but not initialized.
    154      */
    155     UninitInstanceMap* uninitMap;
    156 
    157     /*
    158      * Array of SRegType arrays, one entry per code unit.  We only need
    159      * entries for code units that hold the start of an "interesting"
    160      * instruction.  For register map generation, we're only interested
    161      * in GC points.
    162      */
    163     RegType**       addrRegs;
    164 } VerifierData;
    165 
    166 
    167 /* table with static merge logic for primitive types */
    168 extern const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX];
    169 
    170 
    171 /*
    172  * Returns "true" if the flags indicate that this address holds the start
    173  * of an instruction.
    174  */
    175 INLINE bool dvmInsnIsOpcode(const InsnFlags* insnFlags, int addr) {
    176     return (insnFlags[addr] & kInsnFlagWidthMask) != 0;
    177 }
    178 
    179 /*
    180  * Extract the unsigned 16-bit instruction width from "flags".
    181  */
    182 INLINE int dvmInsnGetWidth(const InsnFlags* insnFlags, int addr) {
    183     return insnFlags[addr] & kInsnFlagWidthMask;
    184 }
    185 
    186 /*
    187  * Changed?
    188  */
    189 INLINE bool dvmInsnIsChanged(const InsnFlags* insnFlags, int addr) {
    190     return (insnFlags[addr] & kInsnFlagChanged) != 0;
    191 }
    192 INLINE void dvmInsnSetChanged(InsnFlags* insnFlags, int addr, bool changed)
    193 {
    194     if (changed)
    195         insnFlags[addr] |= kInsnFlagChanged;
    196     else
    197         insnFlags[addr] &= ~kInsnFlagChanged;
    198 }
    199 
    200 /*
    201  * Visited?
    202  */
    203 INLINE bool dvmInsnIsVisited(const InsnFlags* insnFlags, int addr) {
    204     return (insnFlags[addr] & kInsnFlagVisited) != 0;
    205 }
    206 INLINE void dvmInsnSetVisited(InsnFlags* insnFlags, int addr, bool changed)
    207 {
    208     if (changed)
    209         insnFlags[addr] |= kInsnFlagVisited;
    210     else
    211         insnFlags[addr] &= ~kInsnFlagVisited;
    212 }
    213 
    214 /*
    215  * Visited or changed?
    216  */
    217 INLINE bool dvmInsnIsVisitedOrChanged(const InsnFlags* insnFlags, int addr) {
    218     return (insnFlags[addr] & (kInsnFlagVisited|kInsnFlagChanged)) != 0;
    219 }
    220 
    221 /*
    222  * In a "try" block?
    223  */
    224 INLINE bool dvmInsnIsInTry(const InsnFlags* insnFlags, int addr) {
    225     return (insnFlags[addr] & kInsnFlagInTry) != 0;
    226 }
    227 INLINE void dvmInsnSetInTry(InsnFlags* insnFlags, int addr, bool inTry)
    228 {
    229     assert(inTry);
    230     //if (inTry)
    231         insnFlags[addr] |= kInsnFlagInTry;
    232     //else
    233     //    insnFlags[addr] &= ~kInsnFlagInTry;
    234 }
    235 
    236 /*
    237  * Instruction is a branch target or exception handler?
    238  */
    239 INLINE bool dvmInsnIsBranchTarget(const InsnFlags* insnFlags, int addr) {
    240     return (insnFlags[addr] & kInsnFlagBranchTarget) != 0;
    241 }
    242 INLINE void dvmInsnSetBranchTarget(InsnFlags* insnFlags, int addr,
    243     bool isBranch)
    244 {
    245     assert(isBranch);
    246     //if (isBranch)
    247         insnFlags[addr] |= kInsnFlagBranchTarget;
    248     //else
    249     //    insnFlags[addr] &= ~kInsnFlagBranchTarget;
    250 }
    251 
    252 /*
    253  * Instruction is a GC point?
    254  */
    255 INLINE bool dvmInsnIsGcPoint(const InsnFlags* insnFlags, int addr) {
    256     return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
    257 }
    258 INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
    259     bool isGcPoint)
    260 {
    261     assert(isGcPoint);
    262     //if (isGcPoint)
    263         insnFlags[addr] |= kInsnFlagGcPoint;
    264     //else
    265     //    insnFlags[addr] &= ~kInsnFlagGcPoint;
    266 }
    267 
    268 
    269 /*
    270  * Create a new UninitInstanceMap.
    271  */
    272 UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
    273     const InsnFlags* insnFlags, int newInstanceCount);
    274 
    275 /*
    276  * Release the storage associated with an UninitInstanceMap.
    277  */
    278 void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap);
    279 
    280 /*
    281  * Associate a class with an address.  Returns the map slot index, or -1
    282  * if the address isn't listed in the map (shouldn't happen) or if a
    283  * different class is already associated with the address (shouldn't
    284  * happen either).
    285  */
    286 //int dvmSetUninitInstance(UninitInstanceMap* uninitMap, int addr,
    287 //    ClassObject* clazz);
    288 
    289 /*
    290  * Return the class associated with an uninitialized reference.  Pass in
    291  * the map index.
    292  */
    293 //ClassObject* dvmGetUninitInstance(const UninitInstanceMap* uninitMap, int idx);
    294 
    295 /*
    296  * Clear the class associated with an uninitialized reference.  Pass in
    297  * the map index.
    298  */
    299 //void dvmClearUninitInstance(UninitInstanceMap* uninitMap, int idx);
    300 
    301 
    302 /*
    303  * Verify bytecode in "meth".  "insnFlags" should be populated with
    304  * instruction widths and "in try" flags.
    305  */
    306 bool dvmVerifyCodeFlow(VerifierData* vdata);
    307 
    308 #endif /*_DALVIK_CODEVERIFY*/
    309