Home | History | Annotate | Download | only in libdex
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Access the contents of a .dex file.
     19  */
     20 
     21 #include "DexFile.h"
     22 #include "DexOptData.h"
     23 #include "DexProto.h"
     24 #include "DexCatch.h"
     25 #include "Leb128.h"
     26 #include "sha1.h"
     27 #include "ZipArchive.h"
     28 
     29 #include <zlib.h>
     30 
     31 #include <stdlib.h>
     32 #include <stddef.h>
     33 #include <string.h>
     34 #include <fcntl.h>
     35 #include <errno.h>
     36 
     37 
     38 /*
     39  * Verifying checksums is good, but it slows things down and causes us to
     40  * touch every page.  In the "optimized" world, it doesn't work at all,
     41  * because we rewrite the contents.
     42  */
     43 static const bool kVerifyChecksum = false;
     44 static const bool kVerifySignature = false;
     45 
     46 /* (documented in header) */
     47 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
     48     const char* string = dexGetPrimitiveTypeDescriptor(type);
     49 
     50     return (string == NULL) ? '\0' : string[0];
     51 }
     52 
     53 /* (documented in header) */
     54 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
     55     switch (type) {
     56         case PRIM_VOID:    return "V";
     57         case PRIM_BOOLEAN: return "Z";
     58         case PRIM_BYTE:    return "B";
     59         case PRIM_SHORT:   return "S";
     60         case PRIM_CHAR:    return "C";
     61         case PRIM_INT:     return "I";
     62         case PRIM_LONG:    return "J";
     63         case PRIM_FLOAT:   return "F";
     64         case PRIM_DOUBLE:  return "D";
     65         default:           return NULL;
     66     }
     67 
     68     return NULL;
     69 }
     70 
     71 /* (documented in header) */
     72 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
     73     switch (type) {
     74         case PRIM_VOID:    return NULL;
     75         case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
     76         case PRIM_BYTE:    return "Ljava/lang/Byte;";
     77         case PRIM_SHORT:   return "Ljava/lang/Short;";
     78         case PRIM_CHAR:    return "Ljava/lang/Character;";
     79         case PRIM_INT:     return "Ljava/lang/Integer;";
     80         case PRIM_LONG:    return "Ljava/lang/Long;";
     81         case PRIM_FLOAT:   return "Ljava/lang/Float;";
     82         case PRIM_DOUBLE:  return "Ljava/lang/Double;";
     83         default:           return NULL;
     84     }
     85 }
     86 
     87 /* (documented in header) */
     88 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
     89     switch (descriptorChar) {
     90         case 'V': return PRIM_VOID;
     91         case 'Z': return PRIM_BOOLEAN;
     92         case 'B': return PRIM_BYTE;
     93         case 'S': return PRIM_SHORT;
     94         case 'C': return PRIM_CHAR;
     95         case 'I': return PRIM_INT;
     96         case 'J': return PRIM_LONG;
     97         case 'F': return PRIM_FLOAT;
     98         case 'D': return PRIM_DOUBLE;
     99         default:  return PRIM_NOT;
    100     }
    101 }
    102 
    103 /* Return the UTF-8 encoded string with the specified string_id index,
    104  * also filling in the UTF-16 size (number of 16-bit code points).*/
    105 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
    106         u4* utf16Size) {
    107     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
    108     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
    109 
    110     *utf16Size = readUnsignedLeb128(&ptr);
    111     return (const char*) ptr;
    112 }
    113 
    114 /*
    115  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
    116  * least kSHA1DigestOutputLen bytes.
    117  */
    118 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
    119 
    120 /*
    121  * Compute a SHA-1 digest on a range of bytes.
    122  */
    123 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
    124     unsigned char digest[])
    125 {
    126     SHA1_CTX context;
    127     SHA1Init(&context);
    128     SHA1Update(&context, data, length);
    129     SHA1Final(digest, &context);
    130 }
    131 
    132 /*
    133  * Format the SHA-1 digest into the buffer, which must be able to hold at
    134  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
    135  */
    136 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
    137 {
    138     static const char hexDigit[] = "0123456789abcdef";
    139     char* cp;
    140     int i;
    141 
    142     cp = tmpBuf;
    143     for (i = 0; i < kSHA1DigestLen; i++) {
    144         *cp++ = hexDigit[digest[i] >> 4];
    145         *cp++ = hexDigit[digest[i] & 0x0f];
    146     }
    147     *cp++ = '\0';
    148 
    149     assert(cp == tmpBuf + kSHA1DigestOutputLen);
    150 
    151     return tmpBuf;
    152 }
    153 
    154 /*
    155  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
    156  *
    157  * This may or may not be compatible with UTF-8 hash functions used inside
    158  * the Dalvik VM.
    159  *
    160  * The basic "multiply by 31 and add" approach does better on class names
    161  * than most other things tried (e.g. adler32).
    162  */
    163 static u4 classDescriptorHash(const char* str)
    164 {
    165     u4 hash = 1;
    166 
    167     while (*str != '\0')
    168         hash = hash * 31 + *str++;
    169 
    170     return hash;
    171 }
    172 
    173 /*
    174  * Add an entry to the class lookup table.  We hash the string and probe
    175  * until we find an open slot.
    176  */
    177 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
    178     int stringOff, int classDefOff, int* pNumProbes)
    179 {
    180     const char* classDescriptor =
    181         (const char*) (pDexFile->baseAddr + stringOff);
    182     const DexClassDef* pClassDef =
    183         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
    184     u4 hash = classDescriptorHash(classDescriptor);
    185     int mask = pLookup->numEntries-1;
    186     int idx = hash & mask;
    187 
    188     /*
    189      * Find the first empty slot.  We oversized the table, so this is
    190      * guaranteed to finish.
    191      */
    192     int probes = 0;
    193     while (pLookup->table[idx].classDescriptorOffset != 0) {
    194         idx = (idx + 1) & mask;
    195         probes++;
    196     }
    197     //if (probes > 1)
    198     //    LOGW("classLookupAdd: probes=%d", probes);
    199 
    200     pLookup->table[idx].classDescriptorHash = hash;
    201     pLookup->table[idx].classDescriptorOffset = stringOff;
    202     pLookup->table[idx].classDefOffset = classDefOff;
    203     *pNumProbes = probes;
    204 }
    205 
    206 /*
    207  * Create the class lookup hash table.
    208  *
    209  * Returns newly-allocated storage.
    210  */
    211 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
    212 {
    213     DexClassLookup* pLookup;
    214     int allocSize;
    215     int i, numEntries;
    216     int numProbes, totalProbes, maxProbes;
    217 
    218     numProbes = totalProbes = maxProbes = 0;
    219 
    220     assert(pDexFile != NULL);
    221 
    222     /*
    223      * Using a factor of 3 results in far less probing than a factor of 2,
    224      * but almost doubles the flash storage requirements for the bootstrap
    225      * DEX files.  The overall impact on class loading performance seems
    226      * to be minor.  We could probably get some performance improvement by
    227      * using a secondary hash.
    228      */
    229     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
    230     allocSize = offsetof(DexClassLookup, table)
    231                     + numEntries * sizeof(pLookup->table[0]);
    232 
    233     pLookup = (DexClassLookup*) calloc(1, allocSize);
    234     if (pLookup == NULL)
    235         return NULL;
    236     pLookup->size = allocSize;
    237     pLookup->numEntries = numEntries;
    238 
    239     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
    240         const DexClassDef* pClassDef;
    241         const char* pString;
    242 
    243         pClassDef = dexGetClassDef(pDexFile, i);
    244         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
    245 
    246         classLookupAdd(pDexFile, pLookup,
    247             (u1*)pString - pDexFile->baseAddr,
    248             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
    249 
    250         if (numProbes > maxProbes)
    251             maxProbes = numProbes;
    252         totalProbes += numProbes;
    253     }
    254 
    255     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
    256          " total=%d max=%d",
    257         pDexFile->pHeader->classDefsSize, numEntries,
    258         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
    259         allocSize, totalProbes, maxProbes);
    260 
    261     return pLookup;
    262 }
    263 
    264 
    265 /*
    266  * Set up the basic raw data pointers of a DexFile. This function isn't
    267  * meant for general use.
    268  */
    269 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
    270     DexHeader *pHeader = (DexHeader*) data;
    271 
    272     pDexFile->baseAddr = data;
    273     pDexFile->pHeader = pHeader;
    274     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
    275     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
    276     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
    277     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
    278     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
    279     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
    280     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
    281 }
    282 
    283 /*
    284  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
    285  * called after the byte-ordering and structure alignment has been fixed up.
    286  *
    287  * On success, return a newly-allocated DexFile.
    288  */
    289 DexFile* dexFileParse(const u1* data, size_t length, int flags)
    290 {
    291     DexFile* pDexFile = NULL;
    292     const DexHeader* pHeader;
    293     const u1* magic;
    294     int result = -1;
    295 
    296     if (length < sizeof(DexHeader)) {
    297         LOGE("too short to be a valid .dex");
    298         goto bail;      /* bad file format */
    299     }
    300 
    301     pDexFile = (DexFile*) malloc(sizeof(DexFile));
    302     if (pDexFile == NULL)
    303         goto bail;      /* alloc failure */
    304     memset(pDexFile, 0, sizeof(DexFile));
    305 
    306     /*
    307      * Peel off the optimized header.
    308      */
    309     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
    310         magic = data;
    311         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
    312             LOGE("bad opt version (0x%02x %02x %02x %02x)",
    313                  magic[4], magic[5], magic[6], magic[7]);
    314             goto bail;
    315         }
    316 
    317         pDexFile->pOptHeader = (const DexOptHeader*) data;
    318         LOGV("Good opt header, DEX offset is %d, flags=0x%02x",
    319             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
    320 
    321         /* parse the optimized dex file tables */
    322         if (!dexParseOptData(data, length, pDexFile))
    323             goto bail;
    324 
    325         /* ignore the opt header and appended data from here on out */
    326         data += pDexFile->pOptHeader->dexOffset;
    327         length -= pDexFile->pOptHeader->dexOffset;
    328         if (pDexFile->pOptHeader->dexLength > length) {
    329             LOGE("File truncated? stored len=%d, rem len=%d",
    330                 pDexFile->pOptHeader->dexLength, (int) length);
    331             goto bail;
    332         }
    333         length = pDexFile->pOptHeader->dexLength;
    334     }
    335 
    336     dexFileSetupBasicPointers(pDexFile, data);
    337     pHeader = pDexFile->pHeader;
    338 
    339     if (!dexHasValidMagic(pHeader)) {
    340         goto bail;
    341     }
    342 
    343     /*
    344      * Verify the checksum(s).  This is reasonably quick, but does require
    345      * touching every byte in the DEX file.  The base checksum changes after
    346      * byte-swapping and DEX optimization.
    347      */
    348     if (flags & kDexParseVerifyChecksum) {
    349         u4 adler = dexComputeChecksum(pHeader);
    350         if (adler != pHeader->checksum) {
    351             LOGE("ERROR: bad checksum (%08x vs %08x)",
    352                 adler, pHeader->checksum);
    353             if (!(flags & kDexParseContinueOnError))
    354                 goto bail;
    355         } else {
    356             LOGV("+++ adler32 checksum (%08x) verified", adler);
    357         }
    358 
    359         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
    360         if (pOptHeader != NULL) {
    361             adler = dexComputeOptChecksum(pOptHeader);
    362             if (adler != pOptHeader->checksum) {
    363                 LOGE("ERROR: bad opt checksum (%08x vs %08x)",
    364                     adler, pOptHeader->checksum);
    365                 if (!(flags & kDexParseContinueOnError))
    366                     goto bail;
    367             } else {
    368                 LOGV("+++ adler32 opt checksum (%08x) verified", adler);
    369             }
    370         }
    371     }
    372 
    373     /*
    374      * Verify the SHA-1 digest.  (Normally we don't want to do this --
    375      * the digest is used to uniquely identify the original DEX file, and
    376      * can't be computed for verification after the DEX is byte-swapped
    377      * and optimized.)
    378      */
    379     if (kVerifySignature) {
    380         unsigned char sha1Digest[kSHA1DigestLen];
    381         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
    382                             kSHA1DigestLen;
    383 
    384         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
    385         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
    386             char tmpBuf1[kSHA1DigestOutputLen];
    387             char tmpBuf2[kSHA1DigestOutputLen];
    388             LOGE("ERROR: bad SHA1 digest (%s vs %s)",
    389                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
    390                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
    391             if (!(flags & kDexParseContinueOnError))
    392                 goto bail;
    393         } else {
    394             LOGV("+++ sha1 digest verified");
    395         }
    396     }
    397 
    398     if (pHeader->fileSize != length) {
    399         LOGE("ERROR: stored file size (%d) != expected (%d)",
    400             (int) pHeader->fileSize, (int) length);
    401         if (!(flags & kDexParseContinueOnError))
    402             goto bail;
    403     }
    404 
    405     if (pHeader->classDefsSize == 0) {
    406         LOGE("ERROR: DEX file has no classes in it, failing");
    407         goto bail;
    408     }
    409 
    410     /*
    411      * Success!
    412      */
    413     result = 0;
    414 
    415 bail:
    416     if (result != 0 && pDexFile != NULL) {
    417         dexFileFree(pDexFile);
    418         pDexFile = NULL;
    419     }
    420     return pDexFile;
    421 }
    422 
    423 /*
    424  * Free up the DexFile and any associated data structures.
    425  *
    426  * Note we may be called with a partially-initialized DexFile.
    427  */
    428 void dexFileFree(DexFile* pDexFile)
    429 {
    430     if (pDexFile == NULL)
    431         return;
    432 
    433     free(pDexFile);
    434 }
    435 
    436 /*
    437  * Look up a class definition entry by descriptor.
    438  *
    439  * "descriptor" should look like "Landroid/debug/Stuff;".
    440  */
    441 const DexClassDef* dexFindClass(const DexFile* pDexFile,
    442     const char* descriptor)
    443 {
    444     const DexClassLookup* pLookup = pDexFile->pClassLookup;
    445     u4 hash;
    446     int idx, mask;
    447 
    448     hash = classDescriptorHash(descriptor);
    449     mask = pLookup->numEntries - 1;
    450     idx = hash & mask;
    451 
    452     /*
    453      * Search until we find a matching entry or an empty slot.
    454      */
    455     while (true) {
    456         int offset;
    457 
    458         offset = pLookup->table[idx].classDescriptorOffset;
    459         if (offset == 0)
    460             return NULL;
    461 
    462         if (pLookup->table[idx].classDescriptorHash == hash) {
    463             const char* str;
    464 
    465             str = (const char*) (pDexFile->baseAddr + offset);
    466             if (strcmp(str, descriptor) == 0) {
    467                 return (const DexClassDef*)
    468                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
    469             }
    470         }
    471 
    472         idx = (idx + 1) & mask;
    473     }
    474 }
    475 
    476 
    477 /*
    478  * Compute the DEX file checksum for a memory-mapped DEX file.
    479  */
    480 u4 dexComputeChecksum(const DexHeader* pHeader)
    481 {
    482     const u1* start = (const u1*) pHeader;
    483 
    484     uLong adler = adler32(0L, Z_NULL, 0);
    485     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
    486 
    487     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
    488 }
    489 
    490 /*
    491  * Compute the size, in bytes, of a DexCode.
    492  */
    493 size_t dexGetDexCodeSize(const DexCode* pCode)
    494 {
    495     /*
    496      * The catch handler data is the last entry.  It has a variable number
    497      * of variable-size pieces, so we need to create an iterator.
    498      */
    499     u4 handlersSize;
    500     u4 offset;
    501     u4 ui;
    502 
    503     if (pCode->triesSize != 0) {
    504         handlersSize = dexGetHandlersSize(pCode);
    505         offset = dexGetFirstHandlerOffset(pCode);
    506     } else {
    507         handlersSize = 0;
    508         offset = 0;
    509     }
    510 
    511     for (ui = 0; ui < handlersSize; ui++) {
    512         DexCatchIterator iterator;
    513         dexCatchIteratorInit(&iterator, pCode, offset);
    514         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
    515     }
    516 
    517     const u1* handlerData = dexGetCatchHandlerData(pCode);
    518 
    519     //LOGD("+++ pCode=%p handlerData=%p last offset=%d",
    520     //    pCode, handlerData, offset);
    521 
    522     /* return the size of the catch handler + everything before it */
    523     return (handlerData - (u1*) pCode) + offset;
    524 }
    525 
    526 /*
    527  * Round up to the next highest power of 2.
    528  *
    529  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
    530  */
    531 u4 dexRoundUpPower2(u4 val)
    532 {
    533     val--;
    534     val |= val >> 1;
    535     val |= val >> 2;
    536     val |= val >> 4;
    537     val |= val >> 8;
    538     val |= val >> 16;
    539     val++;
    540 
    541     return val;
    542 }
    543