Home | History | Annotate | Download | only in libdex
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Access the contents of a .dex file.
     19  */
     20 
     21 #include "DexFile.h"
     22 #include "DexProto.h"
     23 #include "DexCatch.h"
     24 #include "Leb128.h"
     25 #include "sha1.h"
     26 #include "ZipArchive.h"
     27 
     28 #include <zlib.h>
     29 
     30 #include <stdlib.h>
     31 #include <stddef.h>
     32 #include <string.h>
     33 #include <fcntl.h>
     34 #include <errno.h>
     35 
     36 // fwd
     37 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader);
     38 
     39 
     40 /*
     41  * Verifying checksums is good, but it slows things down and causes us to
     42  * touch every page.  In the "optimized" world, it doesn't work at all,
     43  * because we rewrite the contents.
     44  */
     45 static const bool kVerifyChecksum = false;
     46 static const bool kVerifySignature = false;
     47 
     48 
     49 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
     50  * code point values for comparison. This treats different encodings
     51  * for the same code point as equivalent, except that only a real '\0'
     52  * byte is considered the string terminator. The return value is as
     53  * for strcmp(). */
     54 int dexUtf8Cmp(const char* s1, const char* s2) {
     55     for (;;) {
     56         if (*s1 == '\0') {
     57             if (*s2 == '\0') {
     58                 return 0;
     59             }
     60             return -1;
     61         } else if (*s2 == '\0') {
     62             return 1;
     63         }
     64 
     65         int utf1 = dexGetUtf16FromUtf8(&s1);
     66         int utf2 = dexGetUtf16FromUtf8(&s2);
     67         int diff = utf1 - utf2;
     68 
     69         if (diff != 0) {
     70             return diff;
     71         }
     72     }
     73 }
     74 
     75 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
     76 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
     77     0x00000000, // 00..1f low control characters; nothing valid
     78     0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
     79     0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
     80     0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
     81 };
     82 
     83 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
     84 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
     85     /*
     86      * It's a multibyte encoded character. Decode it and analyze. We
     87      * accept anything that isn't (a) an improperly encoded low value,
     88      * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
     89      * control character, or (e) a high space, layout, or special
     90      * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
     91      * U+fff0..U+ffff).
     92      */
     93 
     94     u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
     95 
     96     // Perform follow-up tests based on the high 8 bits.
     97     switch (utf16 >> 8) {
     98         case 0x00: {
     99             // It's only valid if it's above the ISO-8859-1 high space (0xa0).
    100             return (utf16 > 0x00a0);
    101         }
    102         case 0xd8:
    103         case 0xd9:
    104         case 0xda:
    105         case 0xdb: {
    106             /*
    107              * It's a leading surrogate. Check to see that a trailing
    108              * surrogate follows.
    109              */
    110             utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
    111             return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
    112         }
    113         case 0xdc:
    114         case 0xdd:
    115         case 0xde:
    116         case 0xdf: {
    117             // It's a trailing surrogate, which is not valid at this point.
    118             return false;
    119         }
    120         case 0x20:
    121         case 0xff: {
    122             // It's in the range that has spaces, controls, and specials.
    123             switch (utf16 & 0xfff8) {
    124                 case 0x2000:
    125                 case 0x2008:
    126                 case 0x2028:
    127                 case 0xfff0:
    128                 case 0xfff8: {
    129                     return false;
    130                 }
    131             }
    132             break;
    133         }
    134     }
    135 
    136     return true;
    137 }
    138 
    139 /* Return whether the given string is a valid field or method name. */
    140 bool dexIsValidMemberName(const char* s) {
    141     bool angleName = false;
    142 
    143     switch (*s) {
    144         case '\0': {
    145             // The empty string is not a valid name.
    146             return false;
    147         }
    148         case '<': {
    149             /*
    150              * '<' is allowed only at the start of a name, and if present,
    151              * means that the name must end with '>'.
    152              */
    153             angleName = true;
    154             s++;
    155             break;
    156         }
    157     }
    158 
    159     for (;;) {
    160         switch (*s) {
    161             case '\0': {
    162                 return !angleName;
    163             }
    164             case '>': {
    165                 return angleName && s[1] == '\0';
    166             }
    167         }
    168         if (!dexIsValidMemberNameUtf8(&s)) {
    169             return false;
    170         }
    171     }
    172 }
    173 
    174 /* Return whether the given string is a valid type descriptor. */
    175 bool dexIsValidTypeDescriptor(const char* s) {
    176     int arrayCount = 0;
    177 
    178     while (*s == '[') {
    179         arrayCount++;
    180         s++;
    181     }
    182 
    183     if (arrayCount > 255) {
    184         // Arrays may have no more than 255 dimensions.
    185         return false;
    186     }
    187 
    188     switch (*(s++)) {
    189         case 'B':
    190         case 'C':
    191         case 'D':
    192         case 'F':
    193         case 'I':
    194         case 'J':
    195         case 'S':
    196         case 'Z': {
    197             // These are all single-character descriptors for primitive types.
    198             return (*s == '\0');
    199         }
    200         case 'V': {
    201             // You can't have an array of void.
    202             return (arrayCount == 0) && (*s == '\0');
    203         }
    204         case 'L': {
    205             // Break out and continue below.
    206             break;
    207         }
    208         default: {
    209             // Oddball descriptor character.
    210             return false;
    211         }
    212     }
    213 
    214     // We just consumed the 'L' that introduces a class name.
    215 
    216     bool slashOrFirst = true; // first character or just encountered a slash
    217     for (;;) {
    218         u1 c = (u1) *s;
    219         switch (c) {
    220             case '\0': {
    221                 // Premature end.
    222                 return false;
    223             }
    224             case ';': {
    225                 /*
    226                  * Make sure that this is the end of the string and that
    227                  * it doesn't end with an empty component (including the
    228                  * degenerate case of "L;").
    229                  */
    230                 return (s[1] == '\0') && !slashOrFirst;
    231             }
    232             case '/': {
    233                 if (slashOrFirst) {
    234                     // Slash at start or two slashes in a row.
    235                     return false;
    236                 }
    237                 slashOrFirst = true;
    238                 s++;
    239                 break;
    240             }
    241             default: {
    242                 if (!dexIsValidMemberNameUtf8(&s)) {
    243                     return false;
    244                 }
    245                 slashOrFirst = false;
    246                 break;
    247             }
    248         }
    249     }
    250 }
    251 
    252 /* Return whether the given string is a valid reference descriptor. This
    253  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
    254  * is for a class or array and not a primitive type. */
    255 bool dexIsReferenceDescriptor(const char* s) {
    256     if (!dexIsValidTypeDescriptor(s)) {
    257         return false;
    258     }
    259 
    260     return (s[0] == 'L') || (s[0] == '[');
    261 }
    262 
    263 /* Return whether the given string is a valid class descriptor. This
    264  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
    265  * is for a class and not an array or primitive type. */
    266 bool dexIsClassDescriptor(const char* s) {
    267     if (!dexIsValidTypeDescriptor(s)) {
    268         return false;
    269     }
    270 
    271     return s[0] == 'L';
    272 }
    273 
    274 /* Return whether the given string is a valid field type descriptor. This
    275  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
    276  * is for anything but "void". */
    277 bool dexIsFieldDescriptor(const char* s) {
    278     if (!dexIsValidTypeDescriptor(s)) {
    279         return false;
    280     }
    281 
    282     return s[0] != 'V';
    283 }
    284 
    285 /* Return the UTF-8 encoded string with the specified string_id index,
    286  * also filling in the UTF-16 size (number of 16-bit code points).*/
    287 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
    288         u4* utf16Size) {
    289     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
    290     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
    291 
    292     *utf16Size = readUnsignedLeb128(&ptr);
    293     return (const char*) ptr;
    294 }
    295 
    296 /*
    297  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
    298  * least kSHA1DigestOutputLen bytes.
    299  */
    300 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
    301 
    302 /*
    303  * Compute a SHA-1 digest on a range of bytes.
    304  */
    305 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
    306     unsigned char digest[])
    307 {
    308     SHA1_CTX context;
    309     SHA1Init(&context);
    310     SHA1Update(&context, data, length);
    311     SHA1Final(digest, &context);
    312 }
    313 
    314 /*
    315  * Format the SHA-1 digest into the buffer, which must be able to hold at
    316  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
    317  */
    318 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
    319 {
    320     static const char hexDigit[] = "0123456789abcdef";
    321     char* cp;
    322     int i;
    323 
    324     cp = tmpBuf;
    325     for (i = 0; i < kSHA1DigestLen; i++) {
    326         *cp++ = hexDigit[digest[i] >> 4];
    327         *cp++ = hexDigit[digest[i] & 0x0f];
    328     }
    329     *cp++ = '\0';
    330 
    331     assert(cp == tmpBuf + kSHA1DigestOutputLen);
    332 
    333     return tmpBuf;
    334 }
    335 
    336 /*
    337  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
    338  *
    339  * This may or may not be compatible with UTF-8 hash functions used inside
    340  * the Dalvik VM.
    341  *
    342  * The basic "multiply by 31 and add" approach does better on class names
    343  * than most other things tried (e.g. adler32).
    344  */
    345 static u4 classDescriptorHash(const char* str)
    346 {
    347     u4 hash = 1;
    348 
    349     while (*str != '\0')
    350         hash = hash * 31 + *str++;
    351 
    352     return hash;
    353 }
    354 
    355 /*
    356  * Add an entry to the class lookup table.  We hash the string and probe
    357  * until we find an open slot.
    358  */
    359 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
    360     int stringOff, int classDefOff, int* pNumProbes)
    361 {
    362     const char* classDescriptor =
    363         (const char*) (pDexFile->baseAddr + stringOff);
    364     const DexClassDef* pClassDef =
    365         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
    366     u4 hash = classDescriptorHash(classDescriptor);
    367     int mask = pLookup->numEntries-1;
    368     int idx = hash & mask;
    369 
    370     /*
    371      * Find the first empty slot.  We oversized the table, so this is
    372      * guaranteed to finish.
    373      */
    374     int probes = 0;
    375     while (pLookup->table[idx].classDescriptorOffset != 0) {
    376         idx = (idx + 1) & mask;
    377         probes++;
    378     }
    379     //if (probes > 1)
    380     //    LOGW("classLookupAdd: probes=%d\n", probes);
    381 
    382     pLookup->table[idx].classDescriptorHash = hash;
    383     pLookup->table[idx].classDescriptorOffset = stringOff;
    384     pLookup->table[idx].classDefOffset = classDefOff;
    385     *pNumProbes = probes;
    386 }
    387 
    388 /*
    389  * Round up to the next highest power of 2.
    390  *
    391  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
    392  */
    393 u4 dexRoundUpPower2(u4 val)
    394 {
    395     val--;
    396     val |= val >> 1;
    397     val |= val >> 2;
    398     val |= val >> 4;
    399     val |= val >> 8;
    400     val |= val >> 16;
    401     val++;
    402 
    403     return val;
    404 }
    405 
    406 /*
    407  * Create the class lookup hash table.
    408  *
    409  * Returns newly-allocated storage.
    410  */
    411 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
    412 {
    413     DexClassLookup* pLookup;
    414     int allocSize;
    415     int i, numEntries;
    416     int numProbes, totalProbes, maxProbes;
    417 
    418     numProbes = totalProbes = maxProbes = 0;
    419 
    420     assert(pDexFile != NULL);
    421 
    422     /*
    423      * Using a factor of 3 results in far less probing than a factor of 2,
    424      * but almost doubles the flash storage requirements for the bootstrap
    425      * DEX files.  The overall impact on class loading performance seems
    426      * to be minor.  We could probably get some performance improvement by
    427      * using a secondary hash.
    428      */
    429     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
    430     allocSize = offsetof(DexClassLookup, table)
    431                     + numEntries * sizeof(pLookup->table[0]);
    432 
    433     pLookup = (DexClassLookup*) calloc(1, allocSize);
    434     if (pLookup == NULL)
    435         return NULL;
    436     pLookup->size = allocSize;
    437     pLookup->numEntries = numEntries;
    438 
    439     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
    440         const DexClassDef* pClassDef;
    441         const char* pString;
    442 
    443         pClassDef = dexGetClassDef(pDexFile, i);
    444         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
    445 
    446         classLookupAdd(pDexFile, pLookup,
    447             (u1*)pString - pDexFile->baseAddr,
    448             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
    449 
    450         if (numProbes > maxProbes)
    451             maxProbes = numProbes;
    452         totalProbes += numProbes;
    453     }
    454 
    455     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
    456          " total=%d max=%d\n",
    457         pDexFile->pHeader->classDefsSize, numEntries,
    458         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
    459         allocSize, totalProbes, maxProbes);
    460 
    461     return pLookup;
    462 }
    463 
    464 
    465 /*
    466  * Set up the basic raw data pointers of a DexFile. This function isn't
    467  * meant for general use.
    468  */
    469 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
    470     DexHeader *pHeader = (DexHeader*) data;
    471 
    472     pDexFile->baseAddr = data;
    473     pDexFile->pHeader = pHeader;
    474     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
    475     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
    476     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
    477     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
    478     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
    479     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
    480     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
    481 }
    482 
    483 
    484 /*
    485  * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
    486  */
    487 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
    488     u4* pFullCount, u4* pReducedCount, const u2** pMap)
    489 {
    490     const u4* wordPtr = (const u4*) *pData;
    491     u4 size = *pSize;
    492     u4 mapCount;
    493 
    494     if (expanding) {
    495         if (size < 4)
    496             return false;
    497         mapCount = *pReducedCount = *wordPtr++;
    498         *pFullCount = (u4) -1;
    499         size -= sizeof(u4);
    500     } else {
    501         if (size < 8)
    502             return false;
    503         mapCount = *pFullCount = *wordPtr++;
    504         *pReducedCount = *wordPtr++;
    505         size -= sizeof(u4) * 2;
    506     }
    507 
    508     u4 mapSize = mapCount * sizeof(u2);
    509 
    510     if (size < mapSize)
    511         return false;
    512     *pMap = (const u2*) wordPtr;
    513     size -= mapSize;
    514 
    515     /* advance the pointer */
    516     const u1* ptr = (const u1*) wordPtr;
    517     ptr += (mapSize + 3) & ~0x3;
    518 
    519     /* update pass-by-reference values */
    520     *pData = (const u1*) ptr;
    521     *pSize = size;
    522 
    523     return true;
    524 }
    525 
    526 /*
    527  * Set up some pointers into the mapped data.
    528  *
    529  * See analysis/ReduceConstants.c for the data layout description.
    530  */
    531 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
    532     bool expanding)
    533 {
    534     if (!parseIndexMapEntry(&data, &size, expanding,
    535             &pDexFile->indexMap.classFullCount,
    536             &pDexFile->indexMap.classReducedCount,
    537             &pDexFile->indexMap.classMap))
    538     {
    539         return false;
    540     }
    541 
    542     if (!parseIndexMapEntry(&data, &size, expanding,
    543             &pDexFile->indexMap.methodFullCount,
    544             &pDexFile->indexMap.methodReducedCount,
    545             &pDexFile->indexMap.methodMap))
    546     {
    547         return false;
    548     }
    549 
    550     if (!parseIndexMapEntry(&data, &size, expanding,
    551             &pDexFile->indexMap.fieldFullCount,
    552             &pDexFile->indexMap.fieldReducedCount,
    553             &pDexFile->indexMap.fieldMap))
    554     {
    555         return false;
    556     }
    557 
    558     if (!parseIndexMapEntry(&data, &size, expanding,
    559             &pDexFile->indexMap.stringFullCount,
    560             &pDexFile->indexMap.stringReducedCount,
    561             &pDexFile->indexMap.stringMap))
    562     {
    563         return false;
    564     }
    565 
    566     if (expanding) {
    567         /*
    568          * The map includes the "reduced" counts; pull the original counts
    569          * out of the DexFile so that code has a consistent source.
    570          */
    571         assert(pDexFile->indexMap.classFullCount == (u4) -1);
    572         assert(pDexFile->indexMap.methodFullCount == (u4) -1);
    573         assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
    574         assert(pDexFile->indexMap.stringFullCount == (u4) -1);
    575 
    576 #if 0   // TODO: not available yet -- do later or just skip this
    577         pDexFile->indexMap.classFullCount =
    578             pDexFile->pHeader->typeIdsSize;
    579         pDexFile->indexMap.methodFullCount =
    580             pDexFile->pHeader->methodIdsSize;
    581         pDexFile->indexMap.fieldFullCount =
    582             pDexFile->pHeader->fieldIdsSize;
    583         pDexFile->indexMap.stringFullCount =
    584             pDexFile->pHeader->stringIdsSize;
    585 #endif
    586     }
    587 
    588     LOGI("Class : %u %u %u\n",
    589         pDexFile->indexMap.classFullCount,
    590         pDexFile->indexMap.classReducedCount,
    591         pDexFile->indexMap.classMap[0]);
    592     LOGI("Method: %u %u %u\n",
    593         pDexFile->indexMap.methodFullCount,
    594         pDexFile->indexMap.methodReducedCount,
    595         pDexFile->indexMap.methodMap[0]);
    596     LOGI("Field : %u %u %u\n",
    597         pDexFile->indexMap.fieldFullCount,
    598         pDexFile->indexMap.fieldReducedCount,
    599         pDexFile->indexMap.fieldMap[0]);
    600     LOGI("String: %u %u %u\n",
    601         pDexFile->indexMap.stringFullCount,
    602         pDexFile->indexMap.stringReducedCount,
    603         pDexFile->indexMap.stringMap[0]);
    604 
    605     return true;
    606 }
    607 
    608 /*
    609  * Parse some auxillary data tables.
    610  *
    611  * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
    612  * table.  Subsequent versions switched to the "chunk" format.
    613  */
    614 static bool parseAuxData(const u1* data, DexFile* pDexFile)
    615 {
    616     const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
    617     u4 indexMapType = 0;
    618 
    619     /* v1.0 format? */
    620     if (*pAux == 0) {
    621         LOGV("+++ found OLD dex format\n");
    622         pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
    623         return true;
    624     }
    625     LOGV("+++ found NEW dex format\n");
    626 
    627     /* process chunks until we see the end marker */
    628     while (*pAux != kDexChunkEnd) {
    629         u4 size = *(pAux+1);
    630         u1* data = (u1*) (pAux + 2);
    631 
    632         switch (*pAux) {
    633         case kDexChunkClassLookup:
    634             pDexFile->pClassLookup = (const DexClassLookup*) data;
    635             break;
    636         case kDexChunkReducingIndexMap:
    637             LOGI("+++ found reducing index map, size=%u\n", size);
    638             if (!parseIndexMap(pDexFile, data, size, false)) {
    639                 LOGE("Failed parsing reducing index map\n");
    640                 return false;
    641             }
    642             indexMapType = *pAux;
    643             break;
    644         case kDexChunkExpandingIndexMap:
    645             LOGI("+++ found expanding index map, size=%u\n", size);
    646             if (!parseIndexMap(pDexFile, data, size, true)) {
    647                 LOGE("Failed parsing expanding index map\n");
    648                 return false;
    649             }
    650             indexMapType = *pAux;
    651             break;
    652         case kDexChunkRegisterMaps:
    653             LOGV("+++ found register maps, size=%u\n", size);
    654             pDexFile->pRegisterMapPool = data;
    655             break;
    656         default:
    657             LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
    658                 *pAux,
    659                 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
    660                 (char) ((*pAux) >> 8),  (char)  (*pAux),
    661                 size);
    662             break;
    663         }
    664 
    665         /*
    666          * Advance pointer, padding to 64-bit boundary.  The extra "+8" is
    667          * for the type/size header.
    668          */
    669         size = (size + 8 + 7) & ~7;
    670         pAux += size / sizeof(u4);
    671     }
    672 
    673 #if 0   // TODO: propagate expected map type from the VM through the API
    674     /*
    675      * If we're configured to expect an index map, and we don't find one,
    676      * reject this DEX so we'll regenerate it.  Also, if we found an
    677      * "expanding" map but we're not configured to use it, we have to fail
    678      * because the constants aren't usable without translation.
    679      */
    680     if (indexMapType != expectedIndexMapType) {
    681         LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
    682             indexMapType, DVM_REDUCE_CONSTANTS);
    683         return false;
    684     }
    685 #endif
    686 
    687     return true;
    688 }
    689 
    690 /*
    691  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
    692  * called after the byte-ordering and structure alignment has been fixed up.
    693  *
    694  * On success, return a newly-allocated DexFile.
    695  */
    696 DexFile* dexFileParse(const u1* data, size_t length, int flags)
    697 {
    698     DexFile* pDexFile = NULL;
    699     const DexHeader* pHeader;
    700     const u1* magic;
    701     int result = -1;
    702 
    703     if (length < sizeof(DexHeader)) {
    704         LOGE("too short to be a valid .dex\n");
    705         goto bail;      /* bad file format */
    706     }
    707 
    708     pDexFile = (DexFile*) malloc(sizeof(DexFile));
    709     if (pDexFile == NULL)
    710         goto bail;      /* alloc failure */
    711     memset(pDexFile, 0, sizeof(DexFile));
    712 
    713     /*
    714      * Peel off the optimized header.
    715      */
    716     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
    717         magic = data;
    718         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
    719             LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
    720                  magic[4], magic[5], magic[6], magic[7]);
    721             goto bail;
    722         }
    723 
    724         pDexFile->pOptHeader = (const DexOptHeader*) data;
    725         LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
    726             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
    727 
    728         /* locate some auxillary data tables */
    729         if (!parseAuxData(data, pDexFile))
    730             goto bail;
    731 
    732         /* ignore the opt header and appended data from here on out */
    733         data += pDexFile->pOptHeader->dexOffset;
    734         length -= pDexFile->pOptHeader->dexOffset;
    735         if (pDexFile->pOptHeader->dexLength > length) {
    736             LOGE("File truncated? stored len=%d, rem len=%d\n",
    737                 pDexFile->pOptHeader->dexLength, (int) length);
    738             goto bail;
    739         }
    740         length = pDexFile->pOptHeader->dexLength;
    741     }
    742 
    743     dexFileSetupBasicPointers(pDexFile, data);
    744     pHeader = pDexFile->pHeader;
    745 
    746     magic = pHeader->magic;
    747     if (memcmp(magic, DEX_MAGIC, 4) != 0) {
    748         /* not expected */
    749         LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
    750              magic[0], magic[1], magic[2], magic[3]);
    751         goto bail;
    752     }
    753     if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
    754         LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
    755              magic[4], magic[5], magic[6], magic[7]);
    756         goto bail;
    757     }
    758 
    759     /*
    760      * Verify the checksum(s).  This is reasonably quick, but does require
    761      * touching every byte in the DEX file.  The base checksum changes after
    762      * byte-swapping and DEX optimization.
    763      */
    764     if (flags & kDexParseVerifyChecksum) {
    765         u4 adler = dexComputeChecksum(pHeader);
    766         if (adler != pHeader->checksum) {
    767             LOGE("ERROR: bad checksum (%08x vs %08x)\n",
    768                 adler, pHeader->checksum);
    769             if (!(flags & kDexParseContinueOnError))
    770                 goto bail;
    771         } else {
    772             LOGV("+++ adler32 checksum (%08x) verified\n", adler);
    773         }
    774 
    775         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
    776         if (pOptHeader != NULL) {
    777             adler = dexComputeOptChecksum(pOptHeader);
    778             if (adler != pOptHeader->checksum) {
    779                 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
    780                     adler, pOptHeader->checksum);
    781                 if (!(flags & kDexParseContinueOnError))
    782                     goto bail;
    783             } else {
    784                 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
    785             }
    786         }
    787     }
    788 
    789     /*
    790      * Verify the SHA-1 digest.  (Normally we don't want to do this --
    791      * the digest is used to uniquely identify the original DEX file, and
    792      * can't be computed for verification after the DEX is byte-swapped
    793      * and optimized.)
    794      */
    795     if (kVerifySignature) {
    796         unsigned char sha1Digest[kSHA1DigestLen];
    797         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
    798                             kSHA1DigestLen;
    799 
    800         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
    801         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
    802             char tmpBuf1[kSHA1DigestOutputLen];
    803             char tmpBuf2[kSHA1DigestOutputLen];
    804             LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
    805                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
    806                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
    807             if (!(flags & kDexParseContinueOnError))
    808                 goto bail;
    809         } else {
    810             LOGV("+++ sha1 digest verified\n");
    811         }
    812     }
    813 
    814     if (pHeader->fileSize != length) {
    815         LOGE("ERROR: stored file size (%d) != expected (%d)\n",
    816             (int) pHeader->fileSize, (int) length);
    817         if (!(flags & kDexParseContinueOnError))
    818             goto bail;
    819     }
    820 
    821     if (pHeader->classDefsSize == 0) {
    822         LOGE("ERROR: DEX file has no classes in it, failing\n");
    823         goto bail;
    824     }
    825 
    826     /*
    827      * Success!
    828      */
    829     result = 0;
    830 
    831 bail:
    832     if (result != 0 && pDexFile != NULL) {
    833         dexFileFree(pDexFile);
    834         pDexFile = NULL;
    835     }
    836     return pDexFile;
    837 }
    838 
    839 /*
    840  * Free up the DexFile and any associated data structures.
    841  *
    842  * Note we may be called with a partially-initialized DexFile.
    843  */
    844 void dexFileFree(DexFile* pDexFile)
    845 {
    846     if (pDexFile == NULL)
    847         return;
    848 
    849     free(pDexFile);
    850 }
    851 
    852 /*
    853  * Look up a class definition entry by descriptor.
    854  *
    855  * "descriptor" should look like "Landroid/debug/Stuff;".
    856  */
    857 const DexClassDef* dexFindClass(const DexFile* pDexFile,
    858     const char* descriptor)
    859 {
    860     const DexClassLookup* pLookup = pDexFile->pClassLookup;
    861     u4 hash;
    862     int idx, mask;
    863 
    864     hash = classDescriptorHash(descriptor);
    865     mask = pLookup->numEntries - 1;
    866     idx = hash & mask;
    867 
    868     /*
    869      * Search until we find a matching entry or an empty slot.
    870      */
    871     while (true) {
    872         int offset;
    873 
    874         offset = pLookup->table[idx].classDescriptorOffset;
    875         if (offset == 0)
    876             return NULL;
    877 
    878         if (pLookup->table[idx].classDescriptorHash == hash) {
    879             const char* str;
    880 
    881             str = (const char*) (pDexFile->baseAddr + offset);
    882             if (strcmp(str, descriptor) == 0) {
    883                 return (const DexClassDef*)
    884                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
    885             }
    886         }
    887 
    888         idx = (idx + 1) & mask;
    889     }
    890 }
    891 
    892 
    893 /*
    894  * Compute the DEX file checksum for a memory-mapped DEX file.
    895  */
    896 u4 dexComputeChecksum(const DexHeader* pHeader)
    897 {
    898     const u1* start = (const u1*) pHeader;
    899 
    900     uLong adler = adler32(0L, Z_NULL, 0);
    901     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
    902 
    903     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
    904 }
    905 
    906 /*
    907  * Compute the checksum on the data appended to the DEX file by dexopt.
    908  */
    909 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader)
    910 {
    911     const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset;
    912     const u1* end = (const u1*) pOptHeader +
    913         pOptHeader->auxOffset + pOptHeader->auxLength;
    914 
    915     uLong adler = adler32(0L, Z_NULL, 0);
    916 
    917     return (u4) adler32(adler, start, end - start);
    918 }
    919 
    920 
    921 /*
    922  * Compute the size, in bytes, of a DexCode.
    923  */
    924 size_t dexGetDexCodeSize(const DexCode* pCode)
    925 {
    926     /*
    927      * The catch handler data is the last entry.  It has a variable number
    928      * of variable-size pieces, so we need to create an iterator.
    929      */
    930     u4 handlersSize;
    931     u4 offset;
    932     u4 ui;
    933 
    934     if (pCode->triesSize != 0) {
    935         handlersSize = dexGetHandlersSize(pCode);
    936         offset = dexGetFirstHandlerOffset(pCode);
    937     } else {
    938         handlersSize = 0;
    939         offset = 0;
    940     }
    941 
    942     for (ui = 0; ui < handlersSize; ui++) {
    943         DexCatchIterator iterator;
    944         dexCatchIteratorInit(&iterator, pCode, offset);
    945         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
    946     }
    947 
    948     const u1* handlerData = dexGetCatchHandlerData(pCode);
    949 
    950     //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
    951     //    pCode, handlerData, offset);
    952 
    953     /* return the size of the catch handler + everything before it */
    954     return (handlerData - (u1*) pCode) + offset;
    955 }
    956 
    957 
    958 /*
    959  * ===========================================================================
    960  *      Debug info
    961  * ===========================================================================
    962  */
    963 
    964 /*
    965  * Decode the arguments in a method signature, which looks something
    966  * like "(ID[Ljava/lang/String;)V".
    967  *
    968  * Returns the type signature letter for the next argument, or ')' if
    969  * there are no more args.  Advances "pSig" to point to the character
    970  * after the one returned.
    971  */
    972 static char decodeSignature(const char** pSig)
    973 {
    974     const char* sig = *pSig;
    975 
    976     if (*sig == '(')
    977         sig++;
    978 
    979     if (*sig == 'L') {
    980         /* object ref */
    981         while (*++sig != ';')
    982             ;
    983         *pSig = sig+1;
    984         return 'L';
    985     }
    986     if (*sig == '[') {
    987         /* array; advance past array type */
    988         while (*++sig == '[')
    989             ;
    990         if (*sig == 'L') {
    991             while (*++sig != ';')
    992                 ;
    993         }
    994         *pSig = sig+1;
    995         return '[';
    996     }
    997     if (*sig == '\0')
    998         return *sig;        /* don't advance further */
    999 
   1000     *pSig = sig+1;
   1001     return *sig;
   1002 }
   1003 
   1004 /*
   1005  * returns the length of a type string, given the start of the
   1006  * type string. Used for the case where the debug info format
   1007  * references types that are inside a method type signature.
   1008  */
   1009 static int typeLength (const char *type) {
   1010     // Assumes any leading '(' has already been gobbled
   1011     const char *end = type;
   1012     decodeSignature(&end);
   1013     return end - type;
   1014 }
   1015 
   1016 /*
   1017  * Reads a string index as encoded for the debug info format,
   1018  * returning a string pointer or NULL as appropriate.
   1019  */
   1020 static const char* readStringIdx(const DexFile* pDexFile,
   1021         const u1** pStream) {
   1022     u4 stringIdx = readUnsignedLeb128(pStream);
   1023 
   1024     // Remember, encoded string indicies have 1 added to them.
   1025     if (stringIdx == 0) {
   1026         return NULL;
   1027     } else {
   1028         return dexStringById(pDexFile, stringIdx - 1);
   1029     }
   1030 }
   1031 
   1032 /*
   1033  * Reads a type index as encoded for the debug info format, returning
   1034  * a string pointer for its descriptor or NULL as appropriate.
   1035  */
   1036 static const char* readTypeIdx(const DexFile* pDexFile,
   1037         const u1** pStream) {
   1038     u4 typeIdx = readUnsignedLeb128(pStream);
   1039 
   1040     // Remember, encoded type indicies have 1 added to them.
   1041     if (typeIdx == 0) {
   1042         return NULL;
   1043     } else {
   1044         return dexStringByTypeIdx(pDexFile, typeIdx - 1);
   1045     }
   1046 }
   1047 
   1048 /* access_flag value indicating that a method is static */
   1049 #define ACC_STATIC              0x0008
   1050 
   1051 typedef struct LocalInfo {
   1052     const char *name;
   1053     const char *descriptor;
   1054     const char *signature;
   1055     u2 startAddress;
   1056     bool live;
   1057 } LocalInfo;
   1058 
   1059 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
   1060         LocalInfo *localInReg, DexDebugNewLocalCb localCb)
   1061 {
   1062     if (localCb != NULL && localInReg[reg].live) {
   1063         localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
   1064                 localInReg[reg].name,
   1065                 localInReg[reg].descriptor,
   1066                 localInReg[reg].signature == NULL
   1067                 ? "" : localInReg[reg].signature );
   1068     }
   1069 }
   1070 
   1071 // TODO optimize localCb == NULL case
   1072 void dexDecodeDebugInfo(
   1073             const DexFile* pDexFile,
   1074             const DexCode* pCode,
   1075             const char* classDescriptor,
   1076             u4 protoIdx,
   1077             u4 accessFlags,
   1078             DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
   1079             void* cnxt)
   1080 {
   1081     const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
   1082     u4 line;
   1083     u4 parametersSize;
   1084     u4 address = 0;
   1085     LocalInfo localInReg[pCode->registersSize];
   1086     u4 insnsSize = pCode->insnsSize;
   1087     DexProto proto = { pDexFile, protoIdx };
   1088 
   1089     memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
   1090 
   1091     if (stream == NULL) {
   1092         goto end;
   1093     }
   1094 
   1095     line = readUnsignedLeb128(&stream);
   1096     parametersSize = readUnsignedLeb128(&stream);
   1097 
   1098     u2 argReg = pCode->registersSize - pCode->insSize;
   1099 
   1100     if ((accessFlags & ACC_STATIC) == 0) {
   1101         /*
   1102          * The code is an instance method, which means that there is
   1103          * an initial this parameter. Also, the proto list should
   1104          * contain exactly one fewer argument word than the insSize
   1105          * indicates.
   1106          */
   1107         assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
   1108         localInReg[argReg].name = "this";
   1109         localInReg[argReg].descriptor = classDescriptor;
   1110         localInReg[argReg].startAddress = 0;
   1111         localInReg[argReg].live = true;
   1112         argReg++;
   1113     } else {
   1114         assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
   1115     }
   1116 
   1117     DexParameterIterator iterator;
   1118     dexParameterIteratorInit(&iterator, &proto);
   1119 
   1120     while (parametersSize-- != 0) {
   1121         const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
   1122         const char *name;
   1123         int reg;
   1124 
   1125         if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
   1126             goto invalid_stream;
   1127         }
   1128 
   1129         name = readStringIdx(pDexFile, &stream);
   1130         reg = argReg;
   1131 
   1132         switch (descriptor[0]) {
   1133             case 'D':
   1134             case 'J':
   1135                 argReg += 2;
   1136                 break;
   1137             default:
   1138                 argReg += 1;
   1139                 break;
   1140         }
   1141 
   1142         if (name != NULL) {
   1143             localInReg[reg].name = name;
   1144             localInReg[reg].descriptor = descriptor;
   1145             localInReg[reg].signature = NULL;
   1146             localInReg[reg].startAddress = address;
   1147             localInReg[reg].live = true;
   1148         }
   1149     }
   1150 
   1151     for (;;)  {
   1152         u1 opcode = *stream++;
   1153         u2 reg;
   1154 
   1155         switch (opcode) {
   1156             case DBG_END_SEQUENCE:
   1157                 goto end;
   1158 
   1159             case DBG_ADVANCE_PC:
   1160                 address += readUnsignedLeb128(&stream);
   1161                 break;
   1162 
   1163             case DBG_ADVANCE_LINE:
   1164                 line += readSignedLeb128(&stream);
   1165                 break;
   1166 
   1167             case DBG_START_LOCAL:
   1168             case DBG_START_LOCAL_EXTENDED:
   1169                 reg = readUnsignedLeb128(&stream);
   1170                 if (reg > pCode->registersSize) goto invalid_stream;
   1171 
   1172                 // Emit what was previously there, if anything
   1173                 emitLocalCbIfLive (cnxt, reg, address,
   1174                     localInReg, localCb);
   1175 
   1176                 localInReg[reg].name = readStringIdx(pDexFile, &stream);
   1177                 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
   1178                 if (opcode == DBG_START_LOCAL_EXTENDED) {
   1179                     localInReg[reg].signature
   1180                         = readStringIdx(pDexFile, &stream);
   1181                 } else {
   1182                     localInReg[reg].signature = NULL;
   1183                 }
   1184                 localInReg[reg].startAddress = address;
   1185                 localInReg[reg].live = true;
   1186                 break;
   1187 
   1188             case DBG_END_LOCAL:
   1189                 reg = readUnsignedLeb128(&stream);
   1190                 if (reg > pCode->registersSize) goto invalid_stream;
   1191 
   1192                 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
   1193                 localInReg[reg].live = false;
   1194                 break;
   1195 
   1196             case DBG_RESTART_LOCAL:
   1197                 reg = readUnsignedLeb128(&stream);
   1198                 if (reg > pCode->registersSize) goto invalid_stream;
   1199 
   1200                 if (localInReg[reg].name == NULL
   1201                         || localInReg[reg].descriptor == NULL) {
   1202                     goto invalid_stream;
   1203                 }
   1204 
   1205                 /*
   1206                  * If the register is live, the "restart" is superfluous,
   1207                  * and we don't want to mess with the existing start address.
   1208                  */
   1209                 if (!localInReg[reg].live) {
   1210                     localInReg[reg].startAddress = address;
   1211                     localInReg[reg].live = true;
   1212                 }
   1213                 break;
   1214 
   1215             case DBG_SET_PROLOGUE_END:
   1216             case DBG_SET_EPILOGUE_BEGIN:
   1217             case DBG_SET_FILE:
   1218                 break;
   1219 
   1220             default: {
   1221                 int adjopcode = opcode - DBG_FIRST_SPECIAL;
   1222 
   1223                 address += adjopcode / DBG_LINE_RANGE;
   1224                 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
   1225 
   1226                 if (posCb != NULL) {
   1227                     int done;
   1228                     done = posCb(cnxt, address, line);
   1229 
   1230                     if (done) {
   1231                         // early exit
   1232                         goto end;
   1233                     }
   1234                 }
   1235                 break;
   1236             }
   1237         }
   1238     }
   1239 
   1240 end:
   1241     {
   1242         int reg;
   1243         for (reg = 0; reg < pCode->registersSize; reg++) {
   1244             emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
   1245         }
   1246     }
   1247     return;
   1248 
   1249 invalid_stream:
   1250     IF_LOGE() {
   1251         char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
   1252         LOGE("Invalid debug info stream. class %s; proto %s",
   1253                 classDescriptor, methodDescriptor);
   1254         free(methodDescriptor);
   1255     }
   1256 }
   1257 
   1258