Home | History | Annotate | Download | only in libdex
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Access the contents of a .dex file.
     19  */
     20 
     21 #include "DexFile.h"
     22 #include "DexOptData.h"
     23 #include "DexProto.h"
     24 #include "DexCatch.h"
     25 #include "Leb128.h"
     26 #include "sha1.h"
     27 #include "ZipArchive.h"
     28 
     29 #include <zlib.h>
     30 
     31 #include <stdlib.h>
     32 #include <stddef.h>
     33 #include <string.h>
     34 #include <fcntl.h>
     35 #include <errno.h>
     36 
     37 
     38 /*
     39  * Verifying checksums is good, but it slows things down and causes us to
     40  * touch every page.  In the "optimized" world, it doesn't work at all,
     41  * because we rewrite the contents.
     42  */
     43 static const bool kVerifySignature = false;
     44 
     45 /* (documented in header) */
     46 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
     47     const char* string = dexGetPrimitiveTypeDescriptor(type);
     48 
     49     return (string == NULL) ? '\0' : string[0];
     50 }
     51 
     52 /* (documented in header) */
     53 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
     54     switch (type) {
     55         case PRIM_VOID:    return "V";
     56         case PRIM_BOOLEAN: return "Z";
     57         case PRIM_BYTE:    return "B";
     58         case PRIM_SHORT:   return "S";
     59         case PRIM_CHAR:    return "C";
     60         case PRIM_INT:     return "I";
     61         case PRIM_LONG:    return "J";
     62         case PRIM_FLOAT:   return "F";
     63         case PRIM_DOUBLE:  return "D";
     64         default:           return NULL;
     65     }
     66 
     67     return NULL;
     68 }
     69 
     70 /* (documented in header) */
     71 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
     72     switch (type) {
     73         case PRIM_VOID:    return NULL;
     74         case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
     75         case PRIM_BYTE:    return "Ljava/lang/Byte;";
     76         case PRIM_SHORT:   return "Ljava/lang/Short;";
     77         case PRIM_CHAR:    return "Ljava/lang/Character;";
     78         case PRIM_INT:     return "Ljava/lang/Integer;";
     79         case PRIM_LONG:    return "Ljava/lang/Long;";
     80         case PRIM_FLOAT:   return "Ljava/lang/Float;";
     81         case PRIM_DOUBLE:  return "Ljava/lang/Double;";
     82         default:           return NULL;
     83     }
     84 }
     85 
     86 /* (documented in header) */
     87 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
     88     switch (descriptorChar) {
     89         case 'V': return PRIM_VOID;
     90         case 'Z': return PRIM_BOOLEAN;
     91         case 'B': return PRIM_BYTE;
     92         case 'S': return PRIM_SHORT;
     93         case 'C': return PRIM_CHAR;
     94         case 'I': return PRIM_INT;
     95         case 'J': return PRIM_LONG;
     96         case 'F': return PRIM_FLOAT;
     97         case 'D': return PRIM_DOUBLE;
     98         default:  return PRIM_NOT;
     99     }
    100 }
    101 
    102 /* Return the UTF-8 encoded string with the specified string_id index,
    103  * also filling in the UTF-16 size (number of 16-bit code points).*/
    104 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
    105         u4* utf16Size) {
    106     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
    107     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
    108 
    109     *utf16Size = readUnsignedLeb128(&ptr);
    110     return (const char*) ptr;
    111 }
    112 
    113 /*
    114  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
    115  * least kSHA1DigestOutputLen bytes.
    116  */
    117 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
    118 
    119 /*
    120  * Compute a SHA-1 digest on a range of bytes.
    121  */
    122 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
    123     unsigned char digest[])
    124 {
    125     SHA1_CTX context;
    126     SHA1Init(&context);
    127     SHA1Update(&context, data, length);
    128     SHA1Final(digest, &context);
    129 }
    130 
    131 /*
    132  * Format the SHA-1 digest into the buffer, which must be able to hold at
    133  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
    134  */
    135 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
    136 {
    137     static const char hexDigit[] = "0123456789abcdef";
    138     char* cp;
    139     int i;
    140 
    141     cp = tmpBuf;
    142     for (i = 0; i < kSHA1DigestLen; i++) {
    143         *cp++ = hexDigit[digest[i] >> 4];
    144         *cp++ = hexDigit[digest[i] & 0x0f];
    145     }
    146     *cp++ = '\0';
    147 
    148     assert(cp == tmpBuf + kSHA1DigestOutputLen);
    149 
    150     return tmpBuf;
    151 }
    152 
    153 /*
    154  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
    155  *
    156  * This may or may not be compatible with UTF-8 hash functions used inside
    157  * the Dalvik VM.
    158  *
    159  * The basic "multiply by 31 and add" approach does better on class names
    160  * than most other things tried (e.g. adler32).
    161  */
    162 static u4 classDescriptorHash(const char* str)
    163 {
    164     u4 hash = 1;
    165 
    166     while (*str != '\0')
    167         hash = hash * 31 + *str++;
    168 
    169     return hash;
    170 }
    171 
    172 /*
    173  * Add an entry to the class lookup table.  We hash the string and probe
    174  * until we find an open slot.
    175  */
    176 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
    177     int stringOff, int classDefOff, int* pNumProbes)
    178 {
    179     const char* classDescriptor =
    180         (const char*) (pDexFile->baseAddr + stringOff);
    181     u4 hash = classDescriptorHash(classDescriptor);
    182     int mask = pLookup->numEntries-1;
    183     int idx = hash & mask;
    184 
    185     /*
    186      * Find the first empty slot.  We oversized the table, so this is
    187      * guaranteed to finish.
    188      */
    189     int probes = 0;
    190     while (pLookup->table[idx].classDescriptorOffset != 0) {
    191         idx = (idx + 1) & mask;
    192         probes++;
    193     }
    194     //if (probes > 1)
    195     //    ALOGW("classLookupAdd: probes=%d", probes);
    196 
    197     pLookup->table[idx].classDescriptorHash = hash;
    198     pLookup->table[idx].classDescriptorOffset = stringOff;
    199     pLookup->table[idx].classDefOffset = classDefOff;
    200     *pNumProbes = probes;
    201 }
    202 
    203 /*
    204  * Create the class lookup hash table.
    205  *
    206  * Returns newly-allocated storage.
    207  */
    208 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
    209 {
    210     DexClassLookup* pLookup;
    211     int allocSize;
    212     int i, numEntries;
    213     int numProbes, totalProbes, maxProbes;
    214 
    215     numProbes = totalProbes = maxProbes = 0;
    216 
    217     assert(pDexFile != NULL);
    218 
    219     /*
    220      * Using a factor of 3 results in far less probing than a factor of 2,
    221      * but almost doubles the flash storage requirements for the bootstrap
    222      * DEX files.  The overall impact on class loading performance seems
    223      * to be minor.  We could probably get some performance improvement by
    224      * using a secondary hash.
    225      */
    226     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
    227     allocSize = offsetof(DexClassLookup, table)
    228                     + numEntries * sizeof(pLookup->table[0]);
    229 
    230     pLookup = (DexClassLookup*) calloc(1, allocSize);
    231     if (pLookup == NULL)
    232         return NULL;
    233     pLookup->size = allocSize;
    234     pLookup->numEntries = numEntries;
    235 
    236     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
    237         const DexClassDef* pClassDef;
    238         const char* pString;
    239 
    240         pClassDef = dexGetClassDef(pDexFile, i);
    241         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
    242 
    243         classLookupAdd(pDexFile, pLookup,
    244             (u1*)pString - pDexFile->baseAddr,
    245             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
    246 
    247         if (numProbes > maxProbes)
    248             maxProbes = numProbes;
    249         totalProbes += numProbes;
    250     }
    251 
    252     ALOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
    253          " total=%d max=%d",
    254         pDexFile->pHeader->classDefsSize, numEntries,
    255         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
    256         allocSize, totalProbes, maxProbes);
    257 
    258     return pLookup;
    259 }
    260 
    261 
    262 /*
    263  * Set up the basic raw data pointers of a DexFile. This function isn't
    264  * meant for general use.
    265  */
    266 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
    267     DexHeader *pHeader = (DexHeader*) data;
    268 
    269     pDexFile->baseAddr = data;
    270     pDexFile->pHeader = pHeader;
    271     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
    272     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
    273     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
    274     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
    275     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
    276     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
    277     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
    278 }
    279 
    280 /*
    281  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
    282  * called after the byte-ordering and structure alignment has been fixed up.
    283  *
    284  * On success, return a newly-allocated DexFile.
    285  */
    286 DexFile* dexFileParse(const u1* data, size_t length, int flags)
    287 {
    288     DexFile* pDexFile = NULL;
    289     const DexHeader* pHeader;
    290     const u1* magic;
    291     int result = -1;
    292 
    293     if (length < sizeof(DexHeader)) {
    294         ALOGE("too short to be a valid .dex");
    295         goto bail;      /* bad file format */
    296     }
    297 
    298     pDexFile = (DexFile*) malloc(sizeof(DexFile));
    299     if (pDexFile == NULL)
    300         goto bail;      /* alloc failure */
    301     memset(pDexFile, 0, sizeof(DexFile));
    302 
    303     /*
    304      * Peel off the optimized header.
    305      */
    306     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
    307         magic = data;
    308         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
    309             ALOGE("bad opt version (0x%02x %02x %02x %02x)",
    310                  magic[4], magic[5], magic[6], magic[7]);
    311             goto bail;
    312         }
    313 
    314         pDexFile->pOptHeader = (const DexOptHeader*) data;
    315         ALOGV("Good opt header, DEX offset is %d, flags=0x%02x",
    316             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
    317 
    318         /* parse the optimized dex file tables */
    319         if (!dexParseOptData(data, length, pDexFile))
    320             goto bail;
    321 
    322         /* ignore the opt header and appended data from here on out */
    323         data += pDexFile->pOptHeader->dexOffset;
    324         length -= pDexFile->pOptHeader->dexOffset;
    325         if (pDexFile->pOptHeader->dexLength > length) {
    326             ALOGE("File truncated? stored len=%d, rem len=%d",
    327                 pDexFile->pOptHeader->dexLength, (int) length);
    328             goto bail;
    329         }
    330         length = pDexFile->pOptHeader->dexLength;
    331     }
    332 
    333     dexFileSetupBasicPointers(pDexFile, data);
    334     pHeader = pDexFile->pHeader;
    335 
    336     if (!dexHasValidMagic(pHeader)) {
    337         goto bail;
    338     }
    339 
    340     /*
    341      * Verify the checksum(s).  This is reasonably quick, but does require
    342      * touching every byte in the DEX file.  The base checksum changes after
    343      * byte-swapping and DEX optimization.
    344      */
    345     if (flags & kDexParseVerifyChecksum) {
    346         u4 adler = dexComputeChecksum(pHeader);
    347         if (adler != pHeader->checksum) {
    348             ALOGE("ERROR: bad checksum (%08x vs %08x)",
    349                 adler, pHeader->checksum);
    350             if (!(flags & kDexParseContinueOnError))
    351                 goto bail;
    352         } else {
    353             ALOGV("+++ adler32 checksum (%08x) verified", adler);
    354         }
    355 
    356         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
    357         if (pOptHeader != NULL) {
    358             adler = dexComputeOptChecksum(pOptHeader);
    359             if (adler != pOptHeader->checksum) {
    360                 ALOGE("ERROR: bad opt checksum (%08x vs %08x)",
    361                     adler, pOptHeader->checksum);
    362                 if (!(flags & kDexParseContinueOnError))
    363                     goto bail;
    364             } else {
    365                 ALOGV("+++ adler32 opt checksum (%08x) verified", adler);
    366             }
    367         }
    368     }
    369 
    370     /*
    371      * Verify the SHA-1 digest.  (Normally we don't want to do this --
    372      * the digest is used to uniquely identify the original DEX file, and
    373      * can't be computed for verification after the DEX is byte-swapped
    374      * and optimized.)
    375      */
    376     if (kVerifySignature) {
    377         unsigned char sha1Digest[kSHA1DigestLen];
    378         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
    379                             kSHA1DigestLen;
    380 
    381         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
    382         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
    383             char tmpBuf1[kSHA1DigestOutputLen];
    384             char tmpBuf2[kSHA1DigestOutputLen];
    385             ALOGE("ERROR: bad SHA1 digest (%s vs %s)",
    386                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
    387                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
    388             if (!(flags & kDexParseContinueOnError))
    389                 goto bail;
    390         } else {
    391             ALOGV("+++ sha1 digest verified");
    392         }
    393     }
    394 
    395     if (pHeader->fileSize != length) {
    396         ALOGE("ERROR: stored file size (%d) != expected (%d)",
    397             (int) pHeader->fileSize, (int) length);
    398         if (!(flags & kDexParseContinueOnError))
    399             goto bail;
    400     }
    401 
    402     if (pHeader->classDefsSize == 0) {
    403         ALOGE("ERROR: DEX file has no classes in it, failing");
    404         goto bail;
    405     }
    406 
    407     /*
    408      * Success!
    409      */
    410     result = 0;
    411 
    412 bail:
    413     if (result != 0 && pDexFile != NULL) {
    414         dexFileFree(pDexFile);
    415         pDexFile = NULL;
    416     }
    417     return pDexFile;
    418 }
    419 
    420 /*
    421  * Free up the DexFile and any associated data structures.
    422  *
    423  * Note we may be called with a partially-initialized DexFile.
    424  */
    425 void dexFileFree(DexFile* pDexFile)
    426 {
    427     if (pDexFile == NULL)
    428         return;
    429 
    430     free(pDexFile);
    431 }
    432 
    433 /*
    434  * Look up a class definition entry by descriptor.
    435  *
    436  * "descriptor" should look like "Landroid/debug/Stuff;".
    437  */
    438 const DexClassDef* dexFindClass(const DexFile* pDexFile,
    439     const char* descriptor)
    440 {
    441     const DexClassLookup* pLookup = pDexFile->pClassLookup;
    442     u4 hash;
    443     int idx, mask;
    444 
    445     hash = classDescriptorHash(descriptor);
    446     mask = pLookup->numEntries - 1;
    447     idx = hash & mask;
    448 
    449     /*
    450      * Search until we find a matching entry or an empty slot.
    451      */
    452     while (true) {
    453         int offset;
    454 
    455         offset = pLookup->table[idx].classDescriptorOffset;
    456         if (offset == 0)
    457             return NULL;
    458 
    459         if (pLookup->table[idx].classDescriptorHash == hash) {
    460             const char* str;
    461 
    462             str = (const char*) (pDexFile->baseAddr + offset);
    463             if (strcmp(str, descriptor) == 0) {
    464                 return (const DexClassDef*)
    465                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
    466             }
    467         }
    468 
    469         idx = (idx + 1) & mask;
    470     }
    471 }
    472 
    473 
    474 /*
    475  * Compute the DEX file checksum for a memory-mapped DEX file.
    476  */
    477 u4 dexComputeChecksum(const DexHeader* pHeader)
    478 {
    479     const u1* start = (const u1*) pHeader;
    480 
    481     uLong adler = adler32(0L, Z_NULL, 0);
    482     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
    483 
    484     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
    485 }
    486 
    487 /*
    488  * Compute the size, in bytes, of a DexCode.
    489  */
    490 size_t dexGetDexCodeSize(const DexCode* pCode)
    491 {
    492     /*
    493      * The catch handler data is the last entry.  It has a variable number
    494      * of variable-size pieces, so we need to create an iterator.
    495      */
    496     u4 handlersSize;
    497     u4 offset;
    498     u4 ui;
    499 
    500     if (pCode->triesSize != 0) {
    501         handlersSize = dexGetHandlersSize(pCode);
    502         offset = dexGetFirstHandlerOffset(pCode);
    503     } else {
    504         handlersSize = 0;
    505         offset = 0;
    506     }
    507 
    508     for (ui = 0; ui < handlersSize; ui++) {
    509         DexCatchIterator iterator;
    510         dexCatchIteratorInit(&iterator, pCode, offset);
    511         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
    512     }
    513 
    514     const u1* handlerData = dexGetCatchHandlerData(pCode);
    515 
    516     //ALOGD("+++ pCode=%p handlerData=%p last offset=%d",
    517     //    pCode, handlerData, offset);
    518 
    519     /* return the size of the catch handler + everything before it */
    520     return (handlerData - (u1*) pCode) + offset;
    521 }
    522 
    523 /*
    524  * Round up to the next highest power of 2.
    525  *
    526  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
    527  */
    528 u4 dexRoundUpPower2(u4 val)
    529 {
    530     val--;
    531     val |= val >> 1;
    532     val |= val >> 2;
    533     val |= val >> 4;
    534     val |= val >> 8;
    535     val |= val >> 16;
    536     val++;
    537 
    538     return val;
    539 }
    540