Home | History | Annotate | Download | only in analysis
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Verifier basic block functions.
     19  */
     20 #include "Dalvik.h"
     21 #include "analysis/VfyBasicBlock.h"
     22 #include "analysis/CodeVerify.h"
     23 #include "analysis/VerifySubs.h"
     24 #include "libdex/DexCatch.h"
     25 #include "libdex/InstrUtils.h"
     26 
     27 
     28 /*
     29  * Extract the list of catch handlers from "pTry" into "addrBuf".
     30  *
     31  * Returns the size of the catch handler list.  If the return value
     32  * exceeds "addrBufSize", the items at the end of the list will not be
     33  * represented in the output array, and this function should be called
     34  * again with a larger buffer.
     35  */
     36 static u4 extractCatchHandlers(const DexCode* pCode, const DexTry* pTry,
     37     u4* addrBuf, size_t addrBufSize)
     38 {
     39     DexCatchIterator iterator;
     40     unsigned int idx = 0;
     41 
     42     dexCatchIteratorInit(&iterator, pCode, pTry->handlerOff);
     43     while (true) {
     44         DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
     45         if (handler == NULL)
     46             break;
     47 
     48         if (idx < addrBufSize) {
     49             addrBuf[idx] = handler->address;
     50         }
     51         idx++;
     52     }
     53 
     54     return idx;
     55 }
     56 
     57 /*
     58  * Returns "true" if the instruction represents a data chunk, such as a
     59  * switch statement block.
     60  */
     61 static bool isDataChunk(u2 insn)
     62 {
     63     return (insn == kPackedSwitchSignature ||
     64             insn == kSparseSwitchSignature ||
     65             insn == kArrayDataSignature);
     66 }
     67 
     68 /*
     69  * Alloc a basic block in the specified slot.  The storage will be
     70  * initialized.
     71  */
     72 static VfyBasicBlock* allocVfyBasicBlock(VerifierData* vdata, u4 idx)
     73 {
     74     VfyBasicBlock* newBlock = (VfyBasicBlock*) calloc(1, sizeof(VfyBasicBlock));
     75     if (newBlock == NULL)
     76         return NULL;
     77 
     78     /*
     79      * TODO: there is no good default size here -- the problem is that most
     80      * addresses will only have one predecessor, but a fair number will
     81      * have 10+, and a few will have 100+ (e.g. the synthetic "finally"
     82      * in a large synchronized method).  We probably want to use a small
     83      * base allocation (perhaps two) and then have the first overflow
     84      * allocation jump dramatically (to 32 or thereabouts).
     85      */
     86     newBlock->predecessors = dvmPointerSetAlloc(32);
     87     if (newBlock->predecessors == NULL) {
     88         free(newBlock);
     89         return NULL;
     90     }
     91 
     92     newBlock->firstAddr = (u4) -1;      // DEBUG
     93 
     94     newBlock->liveRegs = dvmAllocBitVector(vdata->insnRegCount, false);
     95     if (newBlock->liveRegs == NULL) {
     96         dvmPointerSetFree(newBlock->predecessors);
     97         free(newBlock);
     98         return NULL;
     99     }
    100 
    101     return newBlock;
    102 }
    103 
    104 /*
    105  * Add "curBlock" to the predecessor list in "targetIdx".
    106  */
    107 static bool addToPredecessor(VerifierData* vdata, VfyBasicBlock* curBlock,
    108     u4 targetIdx)
    109 {
    110     assert(targetIdx < vdata->insnsSize);
    111 
    112     /*
    113      * Allocate the target basic block if necessary.  This will happen
    114      * on e.g. forward branches.
    115      *
    116      * We can't fill in all the fields, but that will happen automatically
    117      * when we get to that part of the code.
    118      */
    119     VfyBasicBlock* targetBlock = vdata->basicBlocks[targetIdx];
    120     if (targetBlock == NULL) {
    121         targetBlock = allocVfyBasicBlock(vdata, targetIdx);
    122         if (targetBlock == NULL)
    123             return false;
    124         vdata->basicBlocks[targetIdx] = targetBlock;
    125     }
    126 
    127     PointerSet* preds = targetBlock->predecessors;
    128     bool added = dvmPointerSetAddEntry(preds, curBlock);
    129     if (!added) {
    130         /*
    131          * This happens sometimes for packed-switch instructions, where
    132          * the same target address appears more than once.  Also, a
    133          * (pointless) conditional branch to the next instruction will
    134          * trip over this.
    135          */
    136         ALOGV("ODD: point set for targ=0x%04x (%p) already had block "
    137              "fir=0x%04x (%p)",
    138             targetIdx, targetBlock, curBlock->firstAddr, curBlock);
    139     }
    140 
    141     return true;
    142 }
    143 
    144 /*
    145  * Add ourselves to the predecessor list in all blocks we might transfer
    146  * control to.
    147  *
    148  * There are four ways to proceed to a new instruction:
    149  *  (1) continue to the following instruction
    150  *  (2) [un]conditionally branch to a specific location
    151  *  (3) conditionally branch through a "switch" statement
    152  *  (4) throw an exception
    153  *
    154  * Returning from the method (via a return statement or an uncaught
    155  * exception) are not interesting for liveness analysis.
    156  */
    157 static bool setPredecessors(VerifierData* vdata, VfyBasicBlock* curBlock,
    158     u4 curIdx, OpcodeFlags opFlags, u4 nextIdx, u4* handlerList,
    159     size_t numHandlers)
    160 {
    161     const InsnFlags* insnFlags = vdata->insnFlags;
    162     const Method* meth = vdata->method;
    163 
    164     unsigned int handlerIdx;
    165     for (handlerIdx = 0; handlerIdx < numHandlers; handlerIdx++) {
    166         if (!addToPredecessor(vdata, curBlock, handlerList[handlerIdx]))
    167             return false;
    168     }
    169 
    170     if ((opFlags & kInstrCanContinue) != 0) {
    171         if (!addToPredecessor(vdata, curBlock, nextIdx))
    172             return false;
    173     }
    174     if ((opFlags & kInstrCanBranch) != 0) {
    175         bool unused, gotBranch;
    176         s4 branchOffset, absOffset;
    177 
    178         gotBranch = dvmGetBranchOffset(meth, insnFlags, curIdx,
    179                 &branchOffset, &unused);
    180         assert(gotBranch);
    181         absOffset = curIdx + branchOffset;
    182         assert(absOffset >= 0 && (u4) absOffset < vdata->insnsSize);
    183 
    184         if (!addToPredecessor(vdata, curBlock, absOffset))
    185             return false;
    186     }
    187 
    188     if ((opFlags & kInstrCanSwitch) != 0) {
    189         const u2* curInsn = &meth->insns[curIdx];
    190         const u2* dataPtr;
    191 
    192         /* these values have already been verified, so we can trust them */
    193         s4 offsetToData = curInsn[1] | ((s4) curInsn[2]) << 16;
    194         dataPtr = curInsn + offsetToData;
    195 
    196         /*
    197          * dataPtr points to the start of the switch data.  The first
    198          * item is the NOP+magic, the second is the number of entries in
    199          * the switch table.
    200          */
    201         u2 switchCount = dataPtr[1];
    202 
    203         /*
    204          * Skip past the ident field, size field, and the first_key field
    205          * (for packed) or the key list (for sparse).
    206          */
    207         if (dexOpcodeFromCodeUnit(meth->insns[curIdx]) == OP_PACKED_SWITCH) {
    208             dataPtr += 4;
    209         } else {
    210             assert(dexOpcodeFromCodeUnit(meth->insns[curIdx]) ==
    211                     OP_SPARSE_SWITCH);
    212             dataPtr += 2 + 2 * switchCount;
    213         }
    214 
    215         u4 switchIdx;
    216         for (switchIdx = 0; switchIdx < switchCount; switchIdx++) {
    217             s4 offset, absOffset;
    218 
    219             offset = (s4) dataPtr[switchIdx*2] |
    220                      (s4) (dataPtr[switchIdx*2 +1] << 16);
    221             absOffset = curIdx + offset;
    222             assert(absOffset >= 0 && (u4) absOffset < vdata->insnsSize);
    223 
    224             if (!addToPredecessor(vdata, curBlock, absOffset))
    225                 return false;
    226         }
    227     }
    228 
    229     if (false) {
    230         if (dvmPointerSetGetCount(curBlock->predecessors) > 256) {
    231             ALOGI("Lots of preds at 0x%04x in %s.%s:%s", curIdx,
    232                 meth->clazz->descriptor, meth->name, meth->shorty);
    233         }
    234     }
    235 
    236     return true;
    237 }
    238 
    239 /*
    240  * Dump the contents of the basic blocks.
    241  */
    242 static void dumpBasicBlocks(const VerifierData* vdata)
    243 {
    244     char printBuf[256];
    245     unsigned int idx;
    246     int count;
    247 
    248     ALOGI("Basic blocks for %s.%s:%s", vdata->method->clazz->descriptor,
    249         vdata->method->name, vdata->method->shorty);
    250     for (idx = 0; idx < vdata->insnsSize; idx++) {
    251         VfyBasicBlock* block = vdata->basicBlocks[idx];
    252         if (block == NULL)
    253             continue;
    254 
    255         assert(block->firstAddr == idx);
    256         count = snprintf(printBuf, sizeof(printBuf), " %04x-%04x ",
    257             block->firstAddr, block->lastAddr);
    258 
    259         PointerSet* preds = block->predecessors;
    260         size_t numPreds = dvmPointerSetGetCount(preds);
    261 
    262         if (numPreds > 0) {
    263             count += snprintf(printBuf + count, sizeof(printBuf) - count,
    264                     "preds:");
    265 
    266             unsigned int predIdx;
    267             for (predIdx = 0; predIdx < numPreds; predIdx++) {
    268                 if (count >= (int) sizeof(printBuf))
    269                     break;
    270                 const VfyBasicBlock* pred =
    271                     (const VfyBasicBlock*) dvmPointerSetGetEntry(preds, predIdx);
    272                 count += snprintf(printBuf + count, sizeof(printBuf) - count,
    273                         "%04x(%p),", pred->firstAddr, pred);
    274             }
    275         } else {
    276             count += snprintf(printBuf + count, sizeof(printBuf) - count,
    277                     "(no preds)");
    278         }
    279 
    280         printBuf[sizeof(printBuf)-2] = '!';
    281         printBuf[sizeof(printBuf)-1] = '\0';
    282 
    283         ALOGI("%s", printBuf);
    284     }
    285 
    286     usleep(100 * 1000);      /* ugh...let logcat catch up */
    287 }
    288 
    289 
    290 /*
    291  * Generate a list of basic blocks and related information.
    292  *
    293  * On success, returns "true" with vdata->basicBlocks initialized.
    294  */
    295 bool dvmComputeVfyBasicBlocks(VerifierData* vdata)
    296 {
    297     const InsnFlags* insnFlags = vdata->insnFlags;
    298     const Method* meth = vdata->method;
    299     const u4 insnsSize = vdata->insnsSize;
    300     const DexCode* pCode = dvmGetMethodCode(meth);
    301     const DexTry* pTries = NULL;
    302     const size_t kHandlerStackAllocSize = 16;   /* max seen so far is 7 */
    303     u4 handlerAddrs[kHandlerStackAllocSize];
    304     u4* handlerListAlloc = NULL;
    305     u4* handlerList = NULL;
    306     size_t numHandlers = 0;
    307     u4 idx, blockStartAddr;
    308     bool result = false;
    309 
    310     bool verbose = false; //dvmWantVerboseVerification(meth);
    311     if (verbose) {
    312         ALOGI("Basic blocks for %s.%s:%s",
    313             meth->clazz->descriptor, meth->name, meth->shorty);
    314     }
    315 
    316     /*
    317      * Allocate a data structure that allows us to map from an address to
    318      * the corresponding basic block.  Initially all pointers are NULL.
    319      * They are populated on demand as we proceed (either when we reach a
    320      * new BB, or when we need to add an item to the predecessor list in
    321      * a not-yet-reached BB).
    322      *
    323      * Only the first instruction in the block points to the BB structure;
    324      * the rest remain NULL.
    325      */
    326     vdata->basicBlocks =
    327         (VfyBasicBlock**) calloc(insnsSize, sizeof(VfyBasicBlock*));
    328     if (vdata->basicBlocks == NULL)
    329       return false;
    330 
    331     /*
    332      * The "tries" list is a series of non-overlapping regions with a list
    333      * of "catch" handlers.  Rather than do the "find a matching try block"
    334      * computation at each step, we just walk the "try" list in parallel.
    335      *
    336      * Not all methods have "try" blocks.  If this one does, we init tryEnd
    337      * to zero, so that the (exclusive bound) range check trips immediately.
    338      */
    339     u4 tryIndex = 0, tryStart = 0, tryEnd = 0;
    340     if (pCode->triesSize != 0) {
    341         pTries = dexGetTries(pCode);
    342     }
    343 
    344     u4 debugBBIndex = 0;
    345 
    346     /*
    347      * The address associated with a basic block is the start address.
    348      */
    349     blockStartAddr = 0;
    350 
    351     for (idx = 0; idx < insnsSize; ) {
    352         /*
    353          * Make sure we're pointing at the right "try" block.  It should
    354          * not be possible to "jump over" a block, so if we're no longer
    355          * in the correct one we can just advance to the next.
    356          */
    357         if (pTries != NULL && idx >= tryEnd) {
    358             if (tryIndex == pCode->triesSize) {
    359                 /* no more try blocks in this method */
    360                 pTries = NULL;
    361                 numHandlers = 0;
    362             } else {
    363                 /*
    364                  * Extract the set of handlers.  We want to avoid doing
    365                  * this for each block, so we copy them to local storage.
    366                  * If it doesn't fit in the small stack area, we'll use
    367                  * the heap instead.
    368                  *
    369                  * It's rare to encounter a method with more than half a
    370                  * dozen possible handlers.
    371                  */
    372                 tryStart = pTries[tryIndex].startAddr;
    373                 tryEnd = tryStart + pTries[tryIndex].insnCount;
    374 
    375                 if (handlerListAlloc != NULL) {
    376                     free(handlerListAlloc);
    377                     handlerListAlloc = NULL;
    378                 }
    379                 numHandlers = extractCatchHandlers(pCode, &pTries[tryIndex],
    380                     handlerAddrs, kHandlerStackAllocSize);
    381                 assert(numHandlers > 0);    // TODO make sure this is verified
    382                 if (numHandlers <= kHandlerStackAllocSize) {
    383                     handlerList = handlerAddrs;
    384                 } else {
    385                     ALOGD("overflow, numHandlers=%d", numHandlers);
    386                     handlerListAlloc = (u4*) malloc(sizeof(u4) * numHandlers);
    387                     if (handlerListAlloc == NULL)
    388                         return false;
    389                     extractCatchHandlers(pCode, &pTries[tryIndex],
    390                         handlerListAlloc, numHandlers);
    391                     handlerList = handlerListAlloc;
    392                 }
    393 
    394                 ALOGV("+++ start=%x end=%x numHan=%d",
    395                     tryStart, tryEnd, numHandlers);
    396 
    397                 tryIndex++;
    398             }
    399         }
    400 
    401         /*
    402          * Check the current instruction, and possibly aspects of the
    403          * next instruction, to see if this instruction ends the current
    404          * basic block.
    405          *
    406          * Instructions that can throw only end the block if there is the
    407          * possibility of a local handler catching the exception.
    408          */
    409         Opcode opcode = dexOpcodeFromCodeUnit(meth->insns[idx]);
    410         OpcodeFlags opFlags = dexGetFlagsFromOpcode(opcode);
    411         size_t nextIdx = idx + dexGetWidthFromInstruction(&meth->insns[idx]);
    412         bool endBB = false;
    413         bool ignoreInstr = false;
    414 
    415         if ((opFlags & kInstrCanContinue) == 0) {
    416             /* does not continue */
    417             endBB = true;
    418         } else if ((opFlags & (kInstrCanBranch | kInstrCanSwitch)) != 0) {
    419             /* conditionally branches elsewhere */
    420             endBB = true;
    421         } else if ((opFlags & kInstrCanThrow) != 0 &&
    422                 dvmInsnIsInTry(insnFlags, idx))
    423         {
    424             /* throws an exception that might be caught locally */
    425             endBB = true;
    426         } else if (isDataChunk(meth->insns[idx])) {
    427             /*
    428              * If this is a data chunk (e.g. switch data) we want to skip
    429              * over it entirely.  Set endBB so we don't carry this along as
    430              * the start of a block, and ignoreInstr so we don't try to
    431              * open a basic block for this instruction.
    432              */
    433             endBB = ignoreInstr = true;
    434         } else if (dvmInsnIsBranchTarget(insnFlags, nextIdx)) {
    435             /*
    436              * We also need to end it if the next instruction is a branch
    437              * target.  Note we've tagged exception catch blocks as such.
    438              *
    439              * If we're this far along in the "else" chain, we know that
    440              * this isn't a data-chunk NOP, and control can continue to
    441              * the next instruction, so we're okay examining "nextIdx".
    442              */
    443             assert(nextIdx < insnsSize);
    444             endBB = true;
    445         } else if (opcode == OP_NOP && isDataChunk(meth->insns[nextIdx])) {
    446             /*
    447              * Handle an odd special case: if this is NOP padding before a
    448              * data chunk, also treat it as "ignore".  Otherwise it'll look
    449              * like a block that starts and doesn't end.
    450              */
    451             endBB = ignoreInstr = true;
    452         } else {
    453             /* check: return ops should be caught by absence of can-continue */
    454             assert((opFlags & kInstrCanReturn) == 0);
    455         }
    456 
    457         if (verbose) {
    458             char btc = dvmInsnIsBranchTarget(insnFlags, idx) ? '>' : ' ';
    459             char tryc =
    460                 (pTries != NULL && idx >= tryStart && idx < tryEnd) ? 't' : ' ';
    461             bool startBB = (idx == blockStartAddr);
    462             const char* startEnd;
    463 
    464 
    465             if (ignoreInstr)
    466                 startEnd = "IGNORE";
    467             else if (startBB && endBB)
    468                 startEnd = "START/END";
    469             else if (startBB)
    470                 startEnd = "START";
    471             else if (endBB)
    472                 startEnd = "END";
    473             else
    474                 startEnd = "-";
    475 
    476             ALOGI("%04x: %c%c%s #%d", idx, tryc, btc, startEnd, debugBBIndex);
    477 
    478             if (pTries != NULL && idx == tryStart) {
    479                 assert(numHandlers > 0);
    480                 ALOGI("  EXC block: [%04x, %04x) %d:(%04x...)",
    481                     tryStart, tryEnd, numHandlers, handlerList[0]);
    482             }
    483         }
    484 
    485         if (idx != blockStartAddr) {
    486             /* should not be a basic block struct associated with this addr */
    487             assert(vdata->basicBlocks[idx] == NULL);
    488         }
    489         if (endBB) {
    490             if (!ignoreInstr) {
    491                 /*
    492                  * Create a new BB if one doesn't already exist.
    493                  */
    494                 VfyBasicBlock* curBlock = vdata->basicBlocks[blockStartAddr];
    495                 if (curBlock == NULL) {
    496                     curBlock = allocVfyBasicBlock(vdata, blockStartAddr);
    497                     if (curBlock == NULL)
    498                         return false;
    499                     vdata->basicBlocks[blockStartAddr] = curBlock;
    500                 }
    501 
    502                 curBlock->firstAddr = blockStartAddr;
    503                 curBlock->lastAddr = idx;
    504 
    505                 if (!setPredecessors(vdata, curBlock, idx, opFlags, nextIdx,
    506                         handlerList, numHandlers))
    507                 {
    508                     goto bail;
    509                 }
    510             }
    511 
    512             blockStartAddr = nextIdx;
    513             debugBBIndex++;
    514         }
    515 
    516         idx = nextIdx;
    517     }
    518 
    519     assert(idx == insnsSize);
    520 
    521     result = true;
    522 
    523     if (verbose)
    524         dumpBasicBlocks(vdata);
    525 
    526 bail:
    527     free(handlerListAlloc);
    528     return result;
    529 }
    530 
    531 /*
    532  * Free the storage used by basic blocks.
    533  */
    534 void dvmFreeVfyBasicBlocks(VerifierData* vdata)
    535 {
    536     unsigned int idx;
    537 
    538     if (vdata->basicBlocks == NULL)
    539         return;
    540 
    541     for (idx = 0; idx < vdata->insnsSize; idx++) {
    542         VfyBasicBlock* block = vdata->basicBlocks[idx];
    543         if (block == NULL)
    544             continue;
    545 
    546         dvmPointerSetFree(block->predecessors);
    547         dvmFreeBitVector(block->liveRegs);
    548         free(block);
    549     }
    550 
    551     free(vdata->basicBlocks);
    552 }
    553