Home | History | Annotate | Download | only in analysis
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Dalvik bytecode verifier.
     19  */
     20 #ifndef DALVIK_CODEVERIFY_H_
     21 #define DALVIK_CODEVERIFY_H_
     22 
     23 #include "analysis/VerifySubs.h"
     24 #include "analysis/VfyBasicBlock.h"
     25 
     26 /*
     27  * Enumeration for register type values.  The "hi" piece of a 64-bit value
     28  * MUST immediately follow the "lo" piece in the enumeration, so we can check
     29  * that hi==lo+1.
     30  *
     31  * Assignment of constants:
     32  *   [-MAXINT,-32768)   : integer
     33  *   [-32768,-128)      : short
     34  *   [-128,0)           : byte
     35  *   0                  : zero
     36  *   1                  : one
     37  *   [2,128)            : posbyte
     38  *   [128,32768)        : posshort
     39  *   [32768,65536)      : char
     40  *   [65536,MAXINT]     : integer
     41  *
     42  * Allowed "implicit" widening conversions:
     43  *   zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
     44  *   one -> boolean, posbyte, byte, posshort, short, char, integer
     45  *   boolean -> posbyte, byte, posshort, short, char, integer
     46  *   posbyte -> posshort, short, integer, char
     47  *   byte -> short, integer
     48  *   posshort -> integer, char
     49  *   short -> integer
     50  *   char -> integer
     51  *
     52  * In addition, all of the above can convert to "float".
     53  *
     54  * We're more careful with integer values than the spec requires.  The
     55  * motivation is to restrict byte/char/short to the correct range of values.
     56  * For example, if a method takes a byte argument, we don't want to allow
     57  * the code to load the constant "1024" and pass it in.
     58  */
     59 enum {
     60     kRegTypeUnknown = 0,    /* initial state; use value=0 so calloc works */
     61     kRegTypeUninit = 1,     /* MUST be odd to distinguish from pointer */
     62     kRegTypeConflict,       /* merge clash makes this reg's type unknowable */
     63 
     64     /*
     65      * Category-1nr types.  The order of these is chiseled into a couple
     66      * of tables, so don't add, remove, or reorder if you can avoid it.
     67      */
     68 #define kRegType1nrSTART    kRegTypeZero
     69     kRegTypeZero,           /* 32-bit 0, could be Boolean, Int, Float, or Ref */
     70     kRegTypeOne,            /* 32-bit 1, could be Boolean, Int, Float */
     71     kRegTypeBoolean,        /* must be 0 or 1 */
     72     kRegTypeConstPosByte,   /* const derived byte, known positive */
     73     kRegTypeConstByte,      /* const derived byte */
     74     kRegTypeConstPosShort,  /* const derived short, known positive */
     75     kRegTypeConstShort,     /* const derived short */
     76     kRegTypeConstChar,      /* const derived char */
     77     kRegTypeConstInteger,   /* const derived integer */
     78     kRegTypePosByte,        /* byte, known positive (can become char) */
     79     kRegTypeByte,
     80     kRegTypePosShort,       /* short, known positive (can become char) */
     81     kRegTypeShort,
     82     kRegTypeChar,
     83     kRegTypeInteger,
     84     kRegTypeFloat,
     85 #define kRegType1nrEND      kRegTypeFloat
     86 
     87     kRegTypeConstLo,        /* const derived wide, lower half */
     88     kRegTypeConstHi,        /* const derived wide, upper half */
     89     kRegTypeLongLo,         /* lower-numbered register; endian-independent */
     90     kRegTypeLongHi,
     91     kRegTypeDoubleLo,
     92     kRegTypeDoubleHi,
     93 
     94     /*
     95      * Enumeration max; this is used with "full" (32-bit) RegType values.
     96      *
     97      * Anything larger than this is a ClassObject or uninit ref.  Mask off
     98      * all but the low 8 bits; if you're left with kRegTypeUninit, pull
     99      * the uninit index out of the high 24.  Because kRegTypeUninit has an
    100      * odd value, there is no risk of a particular ClassObject pointer bit
    101      * pattern being confused for it (assuming our class object allocator
    102      * uses word alignment).
    103      */
    104     kRegTypeMAX
    105 };
    106 #define kRegTypeUninitMask  0xff
    107 #define kRegTypeUninitShift 8
    108 
    109 /*
    110  * RegType holds information about the type of data held in a register.
    111  * For most types it's a simple enum.  For reference types it holds a
    112  * pointer to the ClassObject, and for uninitialized references it holds
    113  * an index into the UninitInstanceMap.
    114  */
    115 typedef u4 RegType;
    116 
    117 /*
    118  * A bit vector indicating which entries in the monitor stack are
    119  * associated with this register.  The low bit corresponds to the stack's
    120  * bottom-most entry.
    121  */
    122 typedef u4 MonitorEntries;
    123 #define kMaxMonitorStackDepth   (sizeof(MonitorEntries) * 8)
    124 
    125 /*
    126  * During verification, we associate one of these with every "interesting"
    127  * instruction.  We track the status of all registers, and (if the method
    128  * has any monitor-enter instructions) maintain a stack of entered monitors
    129  * (identified by code unit offset).
    130  *
    131  * If live-precise register maps are enabled, the "liveRegs" vector will
    132  * be populated.  Unlike the other lists of registers here, we do not
    133  * track the liveness of the method result register (which is not visible
    134  * to the GC).
    135  */
    136 struct RegisterLine {
    137     RegType*        regTypes;
    138     MonitorEntries* monitorEntries;
    139     u4*             monitorStack;
    140     unsigned int    monitorStackTop;
    141     BitVector*      liveRegs;
    142 };
    143 
    144 /*
    145  * Table that maps uninitialized instances to classes, based on the
    146  * address of the new-instance instruction.  One per method.
    147  */
    148 struct UninitInstanceMap {
    149     int numEntries;
    150     struct {
    151         int             addr;   /* code offset, or -1 for method arg ("this") */
    152         ClassObject*    clazz;  /* class created at this address */
    153     } map[1];
    154 };
    155 #define kUninitThisArgAddr  (-1)
    156 #define kUninitThisArgSlot  0
    157 
    158 /*
    159  * Various bits of data used by the verifier and register map generator.
    160  */
    161 struct VerifierData {
    162     /*
    163      * The method we're working on.
    164      */
    165     const Method*   method;
    166 
    167     /*
    168      * Number of code units of instructions in the method.  A cache of the
    169      * value calculated by dvmGetMethodInsnsSize().
    170      */
    171     u4              insnsSize;
    172 
    173     /*
    174      * Number of registers we track for each instruction.  This is equal
    175      * to the method's declared "registersSize".  (Does not include the
    176      * pending return value.)
    177      */
    178     u4              insnRegCount;
    179 
    180     /*
    181      * Instruction widths and flags, one entry per code unit.
    182      */
    183     InsnFlags*      insnFlags;
    184 
    185     /*
    186      * Uninitialized instance map, used for tracking the movement of
    187      * objects that have been allocated but not initialized.
    188      */
    189     UninitInstanceMap* uninitMap;
    190 
    191     /*
    192      * Array of RegisterLine structs, one entry per code unit.  We only need
    193      * entries for code units that hold the start of an "interesting"
    194      * instruction.  For register map generation, we're only interested
    195      * in GC points.
    196      */
    197     RegisterLine*   registerLines;
    198 
    199     /*
    200      * The number of occurrences of specific opcodes.
    201      */
    202     size_t          newInstanceCount;
    203     size_t          monitorEnterCount;
    204 
    205     /*
    206      * Array of pointers to basic blocks, one entry per code unit.  Used
    207      * for liveness analysis.
    208      */
    209     VfyBasicBlock** basicBlocks;
    210 };
    211 
    212 
    213 /* table with static merge logic for primitive types */
    214 extern const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX];
    215 
    216 
    217 /*
    218  * Returns "true" if the flags indicate that this address holds the start
    219  * of an instruction.
    220  */
    221 INLINE bool dvmInsnIsOpcode(const InsnFlags* insnFlags, int addr) {
    222     return (insnFlags[addr] & kInsnFlagWidthMask) != 0;
    223 }
    224 
    225 /*
    226  * Extract the unsigned 16-bit instruction width from "flags".
    227  */
    228 INLINE int dvmInsnGetWidth(const InsnFlags* insnFlags, int addr) {
    229     return insnFlags[addr] & kInsnFlagWidthMask;
    230 }
    231 
    232 /*
    233  * Changed?
    234  */
    235 INLINE bool dvmInsnIsChanged(const InsnFlags* insnFlags, int addr) {
    236     return (insnFlags[addr] & kInsnFlagChanged) != 0;
    237 }
    238 INLINE void dvmInsnSetChanged(InsnFlags* insnFlags, int addr, bool changed)
    239 {
    240     if (changed)
    241         insnFlags[addr] |= kInsnFlagChanged;
    242     else
    243         insnFlags[addr] &= ~kInsnFlagChanged;
    244 }
    245 
    246 /*
    247  * Visited?
    248  */
    249 INLINE bool dvmInsnIsVisited(const InsnFlags* insnFlags, int addr) {
    250     return (insnFlags[addr] & kInsnFlagVisited) != 0;
    251 }
    252 INLINE void dvmInsnSetVisited(InsnFlags* insnFlags, int addr, bool changed)
    253 {
    254     if (changed)
    255         insnFlags[addr] |= kInsnFlagVisited;
    256     else
    257         insnFlags[addr] &= ~kInsnFlagVisited;
    258 }
    259 
    260 /*
    261  * Visited or changed?
    262  */
    263 INLINE bool dvmInsnIsVisitedOrChanged(const InsnFlags* insnFlags, int addr) {
    264     return (insnFlags[addr] & (kInsnFlagVisited|kInsnFlagChanged)) != 0;
    265 }
    266 
    267 /*
    268  * In a "try" block?
    269  */
    270 INLINE bool dvmInsnIsInTry(const InsnFlags* insnFlags, int addr) {
    271     return (insnFlags[addr] & kInsnFlagInTry) != 0;
    272 }
    273 INLINE void dvmInsnSetInTry(InsnFlags* insnFlags, int addr, bool inTry)
    274 {
    275     assert(inTry);
    276     //if (inTry)
    277         insnFlags[addr] |= kInsnFlagInTry;
    278     //else
    279     //    insnFlags[addr] &= ~kInsnFlagInTry;
    280 }
    281 
    282 /*
    283  * Instruction is a branch target or exception handler?
    284  */
    285 INLINE bool dvmInsnIsBranchTarget(const InsnFlags* insnFlags, int addr) {
    286     return (insnFlags[addr] & kInsnFlagBranchTarget) != 0;
    287 }
    288 INLINE void dvmInsnSetBranchTarget(InsnFlags* insnFlags, int addr,
    289     bool isBranch)
    290 {
    291     assert(isBranch);
    292     //if (isBranch)
    293         insnFlags[addr] |= kInsnFlagBranchTarget;
    294     //else
    295     //    insnFlags[addr] &= ~kInsnFlagBranchTarget;
    296 }
    297 
    298 /*
    299  * Instruction is a GC point?
    300  */
    301 INLINE bool dvmInsnIsGcPoint(const InsnFlags* insnFlags, int addr) {
    302     return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
    303 }
    304 INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
    305     bool isGcPoint)
    306 {
    307     assert(isGcPoint);
    308     //if (isGcPoint)
    309         insnFlags[addr] |= kInsnFlagGcPoint;
    310     //else
    311     //    insnFlags[addr] &= ~kInsnFlagGcPoint;
    312 }
    313 
    314 
    315 /*
    316  * Create a new UninitInstanceMap.
    317  */
    318 UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
    319     const InsnFlags* insnFlags, int newInstanceCount);
    320 
    321 /*
    322  * Release the storage associated with an UninitInstanceMap.
    323  */
    324 void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap);
    325 
    326 /*
    327  * Verify bytecode in "meth".  "insnFlags" should be populated with
    328  * instruction widths and "in try" flags.
    329  */
    330 bool dvmVerifyCodeFlow(VerifierData* vdata);
    331 
    332 #endif  // DALVIK_CODEVERIFY_H_
    333