Home | History | Annotate | Download | only in libdex
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Access the contents of a .dex file.
     19  */
     20 
     21 #include "DexFile.h"
     22 #include "DexOptData.h"
     23 #include "DexProto.h"
     24 #include "DexCatch.h"
     25 #include "Leb128.h"
     26 #include "sha1.h"
     27 #include "ZipArchive.h"
     28 
     29 #include <zlib.h>
     30 
     31 #include <stdlib.h>
     32 #include <stddef.h>
     33 #include <string.h>
     34 #include <fcntl.h>
     35 #include <errno.h>
     36 
     37 
     38 /*
     39  * Verifying checksums is good, but it slows things down and causes us to
     40  * touch every page.  In the "optimized" world, it doesn't work at all,
     41  * because we rewrite the contents.
     42  */
     43 static const bool kVerifyChecksum = false;
     44 static const bool kVerifySignature = false;
     45 
     46 
     47 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
     48  * code point values for comparison. This treats different encodings
     49  * for the same code point as equivalent, except that only a real '\0'
     50  * byte is considered the string terminator. The return value is as
     51  * for strcmp(). */
     52 int dexUtf8Cmp(const char* s1, const char* s2) {
     53     for (;;) {
     54         if (*s1 == '\0') {
     55             if (*s2 == '\0') {
     56                 return 0;
     57             }
     58             return -1;
     59         } else if (*s2 == '\0') {
     60             return 1;
     61         }
     62 
     63         int utf1 = dexGetUtf16FromUtf8(&s1);
     64         int utf2 = dexGetUtf16FromUtf8(&s2);
     65         int diff = utf1 - utf2;
     66 
     67         if (diff != 0) {
     68             return diff;
     69         }
     70     }
     71 }
     72 
     73 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
     74 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
     75     0x00000000, // 00..1f low control characters; nothing valid
     76     0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
     77     0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
     78     0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
     79 };
     80 
     81 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
     82 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
     83     /*
     84      * It's a multibyte encoded character. Decode it and analyze. We
     85      * accept anything that isn't (a) an improperly encoded low value,
     86      * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
     87      * control character, or (e) a high space, layout, or special
     88      * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
     89      * U+fff0..U+ffff).
     90      */
     91 
     92     u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
     93 
     94     // Perform follow-up tests based on the high 8 bits.
     95     switch (utf16 >> 8) {
     96         case 0x00: {
     97             // It's only valid if it's above the ISO-8859-1 high space (0xa0).
     98             return (utf16 > 0x00a0);
     99         }
    100         case 0xd8:
    101         case 0xd9:
    102         case 0xda:
    103         case 0xdb: {
    104             /*
    105              * It's a leading surrogate. Check to see that a trailing
    106              * surrogate follows.
    107              */
    108             utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
    109             return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
    110         }
    111         case 0xdc:
    112         case 0xdd:
    113         case 0xde:
    114         case 0xdf: {
    115             // It's a trailing surrogate, which is not valid at this point.
    116             return false;
    117         }
    118         case 0x20:
    119         case 0xff: {
    120             // It's in the range that has spaces, controls, and specials.
    121             switch (utf16 & 0xfff8) {
    122                 case 0x2000:
    123                 case 0x2008:
    124                 case 0x2028:
    125                 case 0xfff0:
    126                 case 0xfff8: {
    127                     return false;
    128                 }
    129             }
    130             break;
    131         }
    132     }
    133 
    134     return true;
    135 }
    136 
    137 /* Return whether the given string is a valid field or method name. */
    138 bool dexIsValidMemberName(const char* s) {
    139     bool angleName = false;
    140 
    141     switch (*s) {
    142         case '\0': {
    143             // The empty string is not a valid name.
    144             return false;
    145         }
    146         case '<': {
    147             /*
    148              * '<' is allowed only at the start of a name, and if present,
    149              * means that the name must end with '>'.
    150              */
    151             angleName = true;
    152             s++;
    153             break;
    154         }
    155     }
    156 
    157     for (;;) {
    158         switch (*s) {
    159             case '\0': {
    160                 return !angleName;
    161             }
    162             case '>': {
    163                 return angleName && s[1] == '\0';
    164             }
    165         }
    166         if (!dexIsValidMemberNameUtf8(&s)) {
    167             return false;
    168         }
    169     }
    170 }
    171 
    172 /* Return whether the given string is a valid type descriptor. */
    173 bool dexIsValidTypeDescriptor(const char* s) {
    174     int arrayCount = 0;
    175 
    176     while (*s == '[') {
    177         arrayCount++;
    178         s++;
    179     }
    180 
    181     if (arrayCount > 255) {
    182         // Arrays may have no more than 255 dimensions.
    183         return false;
    184     }
    185 
    186     switch (*(s++)) {
    187         case 'B':
    188         case 'C':
    189         case 'D':
    190         case 'F':
    191         case 'I':
    192         case 'J':
    193         case 'S':
    194         case 'Z': {
    195             // These are all single-character descriptors for primitive types.
    196             return (*s == '\0');
    197         }
    198         case 'V': {
    199             // You can't have an array of void.
    200             return (arrayCount == 0) && (*s == '\0');
    201         }
    202         case 'L': {
    203             // Break out and continue below.
    204             break;
    205         }
    206         default: {
    207             // Oddball descriptor character.
    208             return false;
    209         }
    210     }
    211 
    212     // We just consumed the 'L' that introduces a class name.
    213 
    214     bool slashOrFirst = true; // first character or just encountered a slash
    215     for (;;) {
    216         u1 c = (u1) *s;
    217         switch (c) {
    218             case '\0': {
    219                 // Premature end.
    220                 return false;
    221             }
    222             case ';': {
    223                 /*
    224                  * Make sure that this is the end of the string and that
    225                  * it doesn't end with an empty component (including the
    226                  * degenerate case of "L;").
    227                  */
    228                 return (s[1] == '\0') && !slashOrFirst;
    229             }
    230             case '/': {
    231                 if (slashOrFirst) {
    232                     // Slash at start or two slashes in a row.
    233                     return false;
    234                 }
    235                 slashOrFirst = true;
    236                 s++;
    237                 break;
    238             }
    239             default: {
    240                 if (!dexIsValidMemberNameUtf8(&s)) {
    241                     return false;
    242                 }
    243                 slashOrFirst = false;
    244                 break;
    245             }
    246         }
    247     }
    248 }
    249 
    250 /* Return whether the given string is a valid reference descriptor. This
    251  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
    252  * is for a class or array and not a primitive type. */
    253 bool dexIsReferenceDescriptor(const char* s) {
    254     if (!dexIsValidTypeDescriptor(s)) {
    255         return false;
    256     }
    257 
    258     return (s[0] == 'L') || (s[0] == '[');
    259 }
    260 
    261 /* Return whether the given string is a valid class descriptor. This
    262  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
    263  * is for a class and not an array or primitive type. */
    264 bool dexIsClassDescriptor(const char* s) {
    265     if (!dexIsValidTypeDescriptor(s)) {
    266         return false;
    267     }
    268 
    269     return s[0] == 'L';
    270 }
    271 
    272 /* Return whether the given string is a valid field type descriptor. This
    273  * is true if dexIsValidTypeDescriptor() returns true and the descriptor
    274  * is for anything but "void". */
    275 bool dexIsFieldDescriptor(const char* s) {
    276     if (!dexIsValidTypeDescriptor(s)) {
    277         return false;
    278     }
    279 
    280     return s[0] != 'V';
    281 }
    282 
    283 /* Return the UTF-8 encoded string with the specified string_id index,
    284  * also filling in the UTF-16 size (number of 16-bit code points).*/
    285 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
    286         u4* utf16Size) {
    287     const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
    288     const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
    289 
    290     *utf16Size = readUnsignedLeb128(&ptr);
    291     return (const char*) ptr;
    292 }
    293 
    294 /*
    295  * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
    296  * least kSHA1DigestOutputLen bytes.
    297  */
    298 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
    299 
    300 /*
    301  * Compute a SHA-1 digest on a range of bytes.
    302  */
    303 static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
    304     unsigned char digest[])
    305 {
    306     SHA1_CTX context;
    307     SHA1Init(&context);
    308     SHA1Update(&context, data, length);
    309     SHA1Final(digest, &context);
    310 }
    311 
    312 /*
    313  * Format the SHA-1 digest into the buffer, which must be able to hold at
    314  * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
    315  */
    316 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
    317 {
    318     static const char hexDigit[] = "0123456789abcdef";
    319     char* cp;
    320     int i;
    321 
    322     cp = tmpBuf;
    323     for (i = 0; i < kSHA1DigestLen; i++) {
    324         *cp++ = hexDigit[digest[i] >> 4];
    325         *cp++ = hexDigit[digest[i] & 0x0f];
    326     }
    327     *cp++ = '\0';
    328 
    329     assert(cp == tmpBuf + kSHA1DigestOutputLen);
    330 
    331     return tmpBuf;
    332 }
    333 
    334 /*
    335  * Compute a hash code on a UTF-8 string, for use with internal hash tables.
    336  *
    337  * This may or may not be compatible with UTF-8 hash functions used inside
    338  * the Dalvik VM.
    339  *
    340  * The basic "multiply by 31 and add" approach does better on class names
    341  * than most other things tried (e.g. adler32).
    342  */
    343 static u4 classDescriptorHash(const char* str)
    344 {
    345     u4 hash = 1;
    346 
    347     while (*str != '\0')
    348         hash = hash * 31 + *str++;
    349 
    350     return hash;
    351 }
    352 
    353 /*
    354  * Add an entry to the class lookup table.  We hash the string and probe
    355  * until we find an open slot.
    356  */
    357 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
    358     int stringOff, int classDefOff, int* pNumProbes)
    359 {
    360     const char* classDescriptor =
    361         (const char*) (pDexFile->baseAddr + stringOff);
    362     const DexClassDef* pClassDef =
    363         (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
    364     u4 hash = classDescriptorHash(classDescriptor);
    365     int mask = pLookup->numEntries-1;
    366     int idx = hash & mask;
    367 
    368     /*
    369      * Find the first empty slot.  We oversized the table, so this is
    370      * guaranteed to finish.
    371      */
    372     int probes = 0;
    373     while (pLookup->table[idx].classDescriptorOffset != 0) {
    374         idx = (idx + 1) & mask;
    375         probes++;
    376     }
    377     //if (probes > 1)
    378     //    LOGW("classLookupAdd: probes=%d\n", probes);
    379 
    380     pLookup->table[idx].classDescriptorHash = hash;
    381     pLookup->table[idx].classDescriptorOffset = stringOff;
    382     pLookup->table[idx].classDefOffset = classDefOff;
    383     *pNumProbes = probes;
    384 }
    385 
    386 /*
    387  * Round up to the next highest power of 2.
    388  *
    389  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
    390  */
    391 u4 dexRoundUpPower2(u4 val)
    392 {
    393     val--;
    394     val |= val >> 1;
    395     val |= val >> 2;
    396     val |= val >> 4;
    397     val |= val >> 8;
    398     val |= val >> 16;
    399     val++;
    400 
    401     return val;
    402 }
    403 
    404 /*
    405  * Create the class lookup hash table.
    406  *
    407  * Returns newly-allocated storage.
    408  */
    409 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
    410 {
    411     DexClassLookup* pLookup;
    412     int allocSize;
    413     int i, numEntries;
    414     int numProbes, totalProbes, maxProbes;
    415 
    416     numProbes = totalProbes = maxProbes = 0;
    417 
    418     assert(pDexFile != NULL);
    419 
    420     /*
    421      * Using a factor of 3 results in far less probing than a factor of 2,
    422      * but almost doubles the flash storage requirements for the bootstrap
    423      * DEX files.  The overall impact on class loading performance seems
    424      * to be minor.  We could probably get some performance improvement by
    425      * using a secondary hash.
    426      */
    427     numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
    428     allocSize = offsetof(DexClassLookup, table)
    429                     + numEntries * sizeof(pLookup->table[0]);
    430 
    431     pLookup = (DexClassLookup*) calloc(1, allocSize);
    432     if (pLookup == NULL)
    433         return NULL;
    434     pLookup->size = allocSize;
    435     pLookup->numEntries = numEntries;
    436 
    437     for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
    438         const DexClassDef* pClassDef;
    439         const char* pString;
    440 
    441         pClassDef = dexGetClassDef(pDexFile, i);
    442         pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
    443 
    444         classLookupAdd(pDexFile, pLookup,
    445             (u1*)pString - pDexFile->baseAddr,
    446             (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
    447 
    448         if (numProbes > maxProbes)
    449             maxProbes = numProbes;
    450         totalProbes += numProbes;
    451     }
    452 
    453     LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
    454          " total=%d max=%d\n",
    455         pDexFile->pHeader->classDefsSize, numEntries,
    456         (100 * pDexFile->pHeader->classDefsSize) / numEntries,
    457         allocSize, totalProbes, maxProbes);
    458 
    459     return pLookup;
    460 }
    461 
    462 
    463 /*
    464  * Set up the basic raw data pointers of a DexFile. This function isn't
    465  * meant for general use.
    466  */
    467 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
    468     DexHeader *pHeader = (DexHeader*) data;
    469 
    470     pDexFile->baseAddr = data;
    471     pDexFile->pHeader = pHeader;
    472     pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
    473     pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
    474     pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
    475     pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
    476     pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
    477     pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
    478     pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
    479 }
    480 
    481 /*
    482  * Parse an optimized or unoptimized .dex file sitting in memory.  This is
    483  * called after the byte-ordering and structure alignment has been fixed up.
    484  *
    485  * On success, return a newly-allocated DexFile.
    486  */
    487 DexFile* dexFileParse(const u1* data, size_t length, int flags)
    488 {
    489     DexFile* pDexFile = NULL;
    490     const DexHeader* pHeader;
    491     const u1* magic;
    492     int result = -1;
    493 
    494     if (length < sizeof(DexHeader)) {
    495         LOGE("too short to be a valid .dex\n");
    496         goto bail;      /* bad file format */
    497     }
    498 
    499     pDexFile = (DexFile*) malloc(sizeof(DexFile));
    500     if (pDexFile == NULL)
    501         goto bail;      /* alloc failure */
    502     memset(pDexFile, 0, sizeof(DexFile));
    503 
    504     /*
    505      * Peel off the optimized header.
    506      */
    507     if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
    508         magic = data;
    509         if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
    510             LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
    511                  magic[4], magic[5], magic[6], magic[7]);
    512             goto bail;
    513         }
    514 
    515         pDexFile->pOptHeader = (const DexOptHeader*) data;
    516         LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
    517             pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
    518 
    519         /* parse the optimized dex file tables */
    520         if (!dexParseOptData(data, length, pDexFile))
    521             goto bail;
    522 
    523         /* ignore the opt header and appended data from here on out */
    524         data += pDexFile->pOptHeader->dexOffset;
    525         length -= pDexFile->pOptHeader->dexOffset;
    526         if (pDexFile->pOptHeader->dexLength > length) {
    527             LOGE("File truncated? stored len=%d, rem len=%d\n",
    528                 pDexFile->pOptHeader->dexLength, (int) length);
    529             goto bail;
    530         }
    531         length = pDexFile->pOptHeader->dexLength;
    532     }
    533 
    534     dexFileSetupBasicPointers(pDexFile, data);
    535     pHeader = pDexFile->pHeader;
    536 
    537     magic = pHeader->magic;
    538     if (memcmp(magic, DEX_MAGIC, 4) != 0) {
    539         /* not expected */
    540         LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
    541              magic[0], magic[1], magic[2], magic[3]);
    542         goto bail;
    543     }
    544     if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
    545         LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
    546              magic[4], magic[5], magic[6], magic[7]);
    547         goto bail;
    548     }
    549 
    550     /*
    551      * Verify the checksum(s).  This is reasonably quick, but does require
    552      * touching every byte in the DEX file.  The base checksum changes after
    553      * byte-swapping and DEX optimization.
    554      */
    555     if (flags & kDexParseVerifyChecksum) {
    556         u4 adler = dexComputeChecksum(pHeader);
    557         if (adler != pHeader->checksum) {
    558             LOGE("ERROR: bad checksum (%08x vs %08x)\n",
    559                 adler, pHeader->checksum);
    560             if (!(flags & kDexParseContinueOnError))
    561                 goto bail;
    562         } else {
    563             LOGV("+++ adler32 checksum (%08x) verified\n", adler);
    564         }
    565 
    566         const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
    567         if (pOptHeader != NULL) {
    568             adler = dexComputeOptChecksum(pOptHeader);
    569             if (adler != pOptHeader->checksum) {
    570                 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
    571                     adler, pOptHeader->checksum);
    572                 if (!(flags & kDexParseContinueOnError))
    573                     goto bail;
    574             } else {
    575                 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
    576             }
    577         }
    578     }
    579 
    580     /*
    581      * Verify the SHA-1 digest.  (Normally we don't want to do this --
    582      * the digest is used to uniquely identify the original DEX file, and
    583      * can't be computed for verification after the DEX is byte-swapped
    584      * and optimized.)
    585      */
    586     if (kVerifySignature) {
    587         unsigned char sha1Digest[kSHA1DigestLen];
    588         const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
    589                             kSHA1DigestLen;
    590 
    591         dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
    592         if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
    593             char tmpBuf1[kSHA1DigestOutputLen];
    594             char tmpBuf2[kSHA1DigestOutputLen];
    595             LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
    596                 dexSHA1DigestToStr(sha1Digest, tmpBuf1),
    597                 dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
    598             if (!(flags & kDexParseContinueOnError))
    599                 goto bail;
    600         } else {
    601             LOGV("+++ sha1 digest verified\n");
    602         }
    603     }
    604 
    605     if (pHeader->fileSize != length) {
    606         LOGE("ERROR: stored file size (%d) != expected (%d)\n",
    607             (int) pHeader->fileSize, (int) length);
    608         if (!(flags & kDexParseContinueOnError))
    609             goto bail;
    610     }
    611 
    612     if (pHeader->classDefsSize == 0) {
    613         LOGE("ERROR: DEX file has no classes in it, failing\n");
    614         goto bail;
    615     }
    616 
    617     /*
    618      * Success!
    619      */
    620     result = 0;
    621 
    622 bail:
    623     if (result != 0 && pDexFile != NULL) {
    624         dexFileFree(pDexFile);
    625         pDexFile = NULL;
    626     }
    627     return pDexFile;
    628 }
    629 
    630 /*
    631  * Free up the DexFile and any associated data structures.
    632  *
    633  * Note we may be called with a partially-initialized DexFile.
    634  */
    635 void dexFileFree(DexFile* pDexFile)
    636 {
    637     if (pDexFile == NULL)
    638         return;
    639 
    640     free(pDexFile);
    641 }
    642 
    643 /*
    644  * Look up a class definition entry by descriptor.
    645  *
    646  * "descriptor" should look like "Landroid/debug/Stuff;".
    647  */
    648 const DexClassDef* dexFindClass(const DexFile* pDexFile,
    649     const char* descriptor)
    650 {
    651     const DexClassLookup* pLookup = pDexFile->pClassLookup;
    652     u4 hash;
    653     int idx, mask;
    654 
    655     hash = classDescriptorHash(descriptor);
    656     mask = pLookup->numEntries - 1;
    657     idx = hash & mask;
    658 
    659     /*
    660      * Search until we find a matching entry or an empty slot.
    661      */
    662     while (true) {
    663         int offset;
    664 
    665         offset = pLookup->table[idx].classDescriptorOffset;
    666         if (offset == 0)
    667             return NULL;
    668 
    669         if (pLookup->table[idx].classDescriptorHash == hash) {
    670             const char* str;
    671 
    672             str = (const char*) (pDexFile->baseAddr + offset);
    673             if (strcmp(str, descriptor) == 0) {
    674                 return (const DexClassDef*)
    675                     (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
    676             }
    677         }
    678 
    679         idx = (idx + 1) & mask;
    680     }
    681 }
    682 
    683 
    684 /*
    685  * Compute the DEX file checksum for a memory-mapped DEX file.
    686  */
    687 u4 dexComputeChecksum(const DexHeader* pHeader)
    688 {
    689     const u1* start = (const u1*) pHeader;
    690 
    691     uLong adler = adler32(0L, Z_NULL, 0);
    692     const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
    693 
    694     return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
    695 }
    696 
    697 /*
    698  * Compute the size, in bytes, of a DexCode.
    699  */
    700 size_t dexGetDexCodeSize(const DexCode* pCode)
    701 {
    702     /*
    703      * The catch handler data is the last entry.  It has a variable number
    704      * of variable-size pieces, so we need to create an iterator.
    705      */
    706     u4 handlersSize;
    707     u4 offset;
    708     u4 ui;
    709 
    710     if (pCode->triesSize != 0) {
    711         handlersSize = dexGetHandlersSize(pCode);
    712         offset = dexGetFirstHandlerOffset(pCode);
    713     } else {
    714         handlersSize = 0;
    715         offset = 0;
    716     }
    717 
    718     for (ui = 0; ui < handlersSize; ui++) {
    719         DexCatchIterator iterator;
    720         dexCatchIteratorInit(&iterator, pCode, offset);
    721         offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
    722     }
    723 
    724     const u1* handlerData = dexGetCatchHandlerData(pCode);
    725 
    726     //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
    727     //    pCode, handlerData, offset);
    728 
    729     /* return the size of the catch handler + everything before it */
    730     return (handlerData - (u1*) pCode) + offset;
    731 }
    732 
    733 
    734 /*
    735  * ===========================================================================
    736  *      Debug info
    737  * ===========================================================================
    738  */
    739 
    740 /*
    741  * Decode the arguments in a method signature, which looks something
    742  * like "(ID[Ljava/lang/String;)V".
    743  *
    744  * Returns the type signature letter for the next argument, or ')' if
    745  * there are no more args.  Advances "pSig" to point to the character
    746  * after the one returned.
    747  */
    748 static char decodeSignature(const char** pSig)
    749 {
    750     const char* sig = *pSig;
    751 
    752     if (*sig == '(')
    753         sig++;
    754 
    755     if (*sig == 'L') {
    756         /* object ref */
    757         while (*++sig != ';')
    758             ;
    759         *pSig = sig+1;
    760         return 'L';
    761     }
    762     if (*sig == '[') {
    763         /* array; advance past array type */
    764         while (*++sig == '[')
    765             ;
    766         if (*sig == 'L') {
    767             while (*++sig != ';')
    768                 ;
    769         }
    770         *pSig = sig+1;
    771         return '[';
    772     }
    773     if (*sig == '\0')
    774         return *sig;        /* don't advance further */
    775 
    776     *pSig = sig+1;
    777     return *sig;
    778 }
    779 
    780 /*
    781  * returns the length of a type string, given the start of the
    782  * type string. Used for the case where the debug info format
    783  * references types that are inside a method type signature.
    784  */
    785 static int typeLength (const char *type) {
    786     // Assumes any leading '(' has already been gobbled
    787     const char *end = type;
    788     decodeSignature(&end);
    789     return end - type;
    790 }
    791 
    792 /*
    793  * Reads a string index as encoded for the debug info format,
    794  * returning a string pointer or NULL as appropriate.
    795  */
    796 static const char* readStringIdx(const DexFile* pDexFile,
    797         const u1** pStream) {
    798     u4 stringIdx = readUnsignedLeb128(pStream);
    799 
    800     // Remember, encoded string indicies have 1 added to them.
    801     if (stringIdx == 0) {
    802         return NULL;
    803     } else {
    804         return dexStringById(pDexFile, stringIdx - 1);
    805     }
    806 }
    807 
    808 /*
    809  * Reads a type index as encoded for the debug info format, returning
    810  * a string pointer for its descriptor or NULL as appropriate.
    811  */
    812 static const char* readTypeIdx(const DexFile* pDexFile,
    813         const u1** pStream) {
    814     u4 typeIdx = readUnsignedLeb128(pStream);
    815 
    816     // Remember, encoded type indicies have 1 added to them.
    817     if (typeIdx == 0) {
    818         return NULL;
    819     } else {
    820         return dexStringByTypeIdx(pDexFile, typeIdx - 1);
    821     }
    822 }
    823 
    824 /* access_flag value indicating that a method is static */
    825 #define ACC_STATIC              0x0008
    826 
    827 typedef struct LocalInfo {
    828     const char *name;
    829     const char *descriptor;
    830     const char *signature;
    831     u2 startAddress;
    832     bool live;
    833 } LocalInfo;
    834 
    835 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
    836         LocalInfo *localInReg, DexDebugNewLocalCb localCb)
    837 {
    838     if (localCb != NULL && localInReg[reg].live) {
    839         localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
    840                 localInReg[reg].name,
    841                 localInReg[reg].descriptor,
    842                 localInReg[reg].signature == NULL
    843                 ? "" : localInReg[reg].signature );
    844     }
    845 }
    846 
    847 // TODO optimize localCb == NULL case
    848 void dexDecodeDebugInfo(
    849             const DexFile* pDexFile,
    850             const DexCode* pCode,
    851             const char* classDescriptor,
    852             u4 protoIdx,
    853             u4 accessFlags,
    854             DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
    855             void* cnxt)
    856 {
    857     const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
    858     u4 line;
    859     u4 parametersSize;
    860     u4 address = 0;
    861     LocalInfo localInReg[pCode->registersSize];
    862     u4 insnsSize = pCode->insnsSize;
    863     DexProto proto = { pDexFile, protoIdx };
    864 
    865     memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
    866 
    867     if (stream == NULL) {
    868         goto end;
    869     }
    870 
    871     line = readUnsignedLeb128(&stream);
    872     parametersSize = readUnsignedLeb128(&stream);
    873 
    874     u2 argReg = pCode->registersSize - pCode->insSize;
    875 
    876     if ((accessFlags & ACC_STATIC) == 0) {
    877         /*
    878          * The code is an instance method, which means that there is
    879          * an initial this parameter. Also, the proto list should
    880          * contain exactly one fewer argument word than the insSize
    881          * indicates.
    882          */
    883         assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
    884         localInReg[argReg].name = "this";
    885         localInReg[argReg].descriptor = classDescriptor;
    886         localInReg[argReg].startAddress = 0;
    887         localInReg[argReg].live = true;
    888         argReg++;
    889     } else {
    890         assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
    891     }
    892 
    893     DexParameterIterator iterator;
    894     dexParameterIteratorInit(&iterator, &proto);
    895 
    896     while (parametersSize-- != 0) {
    897         const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
    898         const char *name;
    899         int reg;
    900 
    901         if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
    902             goto invalid_stream;
    903         }
    904 
    905         name = readStringIdx(pDexFile, &stream);
    906         reg = argReg;
    907 
    908         switch (descriptor[0]) {
    909             case 'D':
    910             case 'J':
    911                 argReg += 2;
    912                 break;
    913             default:
    914                 argReg += 1;
    915                 break;
    916         }
    917 
    918         if (name != NULL) {
    919             localInReg[reg].name = name;
    920             localInReg[reg].descriptor = descriptor;
    921             localInReg[reg].signature = NULL;
    922             localInReg[reg].startAddress = address;
    923             localInReg[reg].live = true;
    924         }
    925     }
    926 
    927     for (;;)  {
    928         u1 opcode = *stream++;
    929         u2 reg;
    930 
    931         switch (opcode) {
    932             case DBG_END_SEQUENCE:
    933                 goto end;
    934 
    935             case DBG_ADVANCE_PC:
    936                 address += readUnsignedLeb128(&stream);
    937                 break;
    938 
    939             case DBG_ADVANCE_LINE:
    940                 line += readSignedLeb128(&stream);
    941                 break;
    942 
    943             case DBG_START_LOCAL:
    944             case DBG_START_LOCAL_EXTENDED:
    945                 reg = readUnsignedLeb128(&stream);
    946                 if (reg > pCode->registersSize) goto invalid_stream;
    947 
    948                 // Emit what was previously there, if anything
    949                 emitLocalCbIfLive (cnxt, reg, address,
    950                     localInReg, localCb);
    951 
    952                 localInReg[reg].name = readStringIdx(pDexFile, &stream);
    953                 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
    954                 if (opcode == DBG_START_LOCAL_EXTENDED) {
    955                     localInReg[reg].signature
    956                         = readStringIdx(pDexFile, &stream);
    957                 } else {
    958                     localInReg[reg].signature = NULL;
    959                 }
    960                 localInReg[reg].startAddress = address;
    961                 localInReg[reg].live = true;
    962                 break;
    963 
    964             case DBG_END_LOCAL:
    965                 reg = readUnsignedLeb128(&stream);
    966                 if (reg > pCode->registersSize) goto invalid_stream;
    967 
    968                 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
    969                 localInReg[reg].live = false;
    970                 break;
    971 
    972             case DBG_RESTART_LOCAL:
    973                 reg = readUnsignedLeb128(&stream);
    974                 if (reg > pCode->registersSize) goto invalid_stream;
    975 
    976                 if (localInReg[reg].name == NULL
    977                         || localInReg[reg].descriptor == NULL) {
    978                     goto invalid_stream;
    979                 }
    980 
    981                 /*
    982                  * If the register is live, the "restart" is superfluous,
    983                  * and we don't want to mess with the existing start address.
    984                  */
    985                 if (!localInReg[reg].live) {
    986                     localInReg[reg].startAddress = address;
    987                     localInReg[reg].live = true;
    988                 }
    989                 break;
    990 
    991             case DBG_SET_PROLOGUE_END:
    992             case DBG_SET_EPILOGUE_BEGIN:
    993             case DBG_SET_FILE:
    994                 break;
    995 
    996             default: {
    997                 int adjopcode = opcode - DBG_FIRST_SPECIAL;
    998 
    999                 address += adjopcode / DBG_LINE_RANGE;
   1000                 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
   1001 
   1002                 if (posCb != NULL) {
   1003                     int done;
   1004                     done = posCb(cnxt, address, line);
   1005 
   1006                     if (done) {
   1007                         // early exit
   1008                         goto end;
   1009                     }
   1010                 }
   1011                 break;
   1012             }
   1013         }
   1014     }
   1015 
   1016 end:
   1017     {
   1018         int reg;
   1019         for (reg = 0; reg < pCode->registersSize; reg++) {
   1020             emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
   1021         }
   1022     }
   1023     return;
   1024 
   1025 invalid_stream:
   1026     IF_LOGE() {
   1027         char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
   1028         LOGE("Invalid debug info stream. class %s; proto %s",
   1029                 classDescriptor, methodDescriptor);
   1030         free(methodDescriptor);
   1031     }
   1032 }
   1033