1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Access the contents of a .dex file. 19 */ 20 21 #include "DexFile.h" 22 #include "DexProto.h" 23 #include "DexCatch.h" 24 #include "Leb128.h" 25 #include "sha1.h" 26 #include "ZipArchive.h" 27 28 #include <zlib.h> 29 30 #include <stdlib.h> 31 #include <stddef.h> 32 #include <string.h> 33 #include <fcntl.h> 34 #include <errno.h> 35 36 // fwd 37 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader); 38 39 40 /* 41 * Verifying checksums is good, but it slows things down and causes us to 42 * touch every page. In the "optimized" world, it doesn't work at all, 43 * because we rewrite the contents. 44 */ 45 static const bool kVerifyChecksum = false; 46 static const bool kVerifySignature = false; 47 48 49 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode 50 * code point values for comparison. This treats different encodings 51 * for the same code point as equivalent, except that only a real '\0' 52 * byte is considered the string terminator. The return value is as 53 * for strcmp(). */ 54 int dexUtf8Cmp(const char* s1, const char* s2) { 55 for (;;) { 56 if (*s1 == '\0') { 57 if (*s2 == '\0') { 58 return 0; 59 } 60 return -1; 61 } else if (*s2 == '\0') { 62 return 1; 63 } 64 65 int utf1 = dexGetUtf16FromUtf8(&s1); 66 int utf2 = dexGetUtf16FromUtf8(&s2); 67 int diff = utf1 - utf2; 68 69 if (diff != 0) { 70 return diff; 71 } 72 } 73 } 74 75 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ 76 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = { 77 0x00000000, // 00..1f low control characters; nothing valid 78 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-' 79 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_' 80 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z' 81 }; 82 83 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ 84 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { 85 /* 86 * It's a multibyte encoded character. Decode it and analyze. We 87 * accept anything that isn't (a) an improperly encoded low value, 88 * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high 89 * control character, or (e) a high space, layout, or special 90 * character (U+00a0, U+2000..U+200f, U+2028..U+202f, 91 * U+fff0..U+ffff). 92 */ 93 94 u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); 95 96 // Perform follow-up tests based on the high 8 bits. 97 switch (utf16 >> 8) { 98 case 0x00: { 99 // It's only valid if it's above the ISO-8859-1 high space (0xa0). 100 return (utf16 > 0x00a0); 101 } 102 case 0xd8: 103 case 0xd9: 104 case 0xda: 105 case 0xdb: { 106 /* 107 * It's a leading surrogate. Check to see that a trailing 108 * surrogate follows. 109 */ 110 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); 111 return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); 112 } 113 case 0xdc: 114 case 0xdd: 115 case 0xde: 116 case 0xdf: { 117 // It's a trailing surrogate, which is not valid at this point. 118 return false; 119 } 120 case 0x20: 121 case 0xff: { 122 // It's in the range that has spaces, controls, and specials. 123 switch (utf16 & 0xfff8) { 124 case 0x2000: 125 case 0x2008: 126 case 0x2028: 127 case 0xfff0: 128 case 0xfff8: { 129 return false; 130 } 131 } 132 break; 133 } 134 } 135 136 return true; 137 } 138 139 /* Return whether the given string is a valid field or method name. */ 140 bool dexIsValidMemberName(const char* s) { 141 bool angleName = false; 142 143 switch (*s) { 144 case '\0': { 145 // The empty string is not a valid name. 146 return false; 147 } 148 case '<': { 149 /* 150 * '<' is allowed only at the start of a name, and if present, 151 * means that the name must end with '>'. 152 */ 153 angleName = true; 154 s++; 155 break; 156 } 157 } 158 159 for (;;) { 160 switch (*s) { 161 case '\0': { 162 return !angleName; 163 } 164 case '>': { 165 return angleName && s[1] == '\0'; 166 } 167 } 168 if (!dexIsValidMemberNameUtf8(&s)) { 169 return false; 170 } 171 } 172 } 173 174 /* Return whether the given string is a valid type descriptor. */ 175 bool dexIsValidTypeDescriptor(const char* s) { 176 int arrayCount = 0; 177 178 while (*s == '[') { 179 arrayCount++; 180 s++; 181 } 182 183 if (arrayCount > 255) { 184 // Arrays may have no more than 255 dimensions. 185 return false; 186 } 187 188 switch (*(s++)) { 189 case 'B': 190 case 'C': 191 case 'D': 192 case 'F': 193 case 'I': 194 case 'J': 195 case 'S': 196 case 'Z': { 197 // These are all single-character descriptors for primitive types. 198 return (*s == '\0'); 199 } 200 case 'V': { 201 // You can't have an array of void. 202 return (arrayCount == 0) && (*s == '\0'); 203 } 204 case 'L': { 205 // Break out and continue below. 206 break; 207 } 208 default: { 209 // Oddball descriptor character. 210 return false; 211 } 212 } 213 214 // We just consumed the 'L' that introduces a class name. 215 216 bool slashOrFirst = true; // first character or just encountered a slash 217 for (;;) { 218 u1 c = (u1) *s; 219 switch (c) { 220 case '\0': { 221 // Premature end. 222 return false; 223 } 224 case ';': { 225 /* 226 * Make sure that this is the end of the string and that 227 * it doesn't end with an empty component (including the 228 * degenerate case of "L;"). 229 */ 230 return (s[1] == '\0') && !slashOrFirst; 231 } 232 case '/': { 233 if (slashOrFirst) { 234 // Slash at start or two slashes in a row. 235 return false; 236 } 237 slashOrFirst = true; 238 s++; 239 break; 240 } 241 default: { 242 if (!dexIsValidMemberNameUtf8(&s)) { 243 return false; 244 } 245 slashOrFirst = false; 246 break; 247 } 248 } 249 } 250 } 251 252 /* Return whether the given string is a valid reference descriptor. This 253 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 254 * is for a class or array and not a primitive type. */ 255 bool dexIsReferenceDescriptor(const char* s) { 256 if (!dexIsValidTypeDescriptor(s)) { 257 return false; 258 } 259 260 return (s[0] == 'L') || (s[0] == '['); 261 } 262 263 /* Return whether the given string is a valid class descriptor. This 264 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 265 * is for a class and not an array or primitive type. */ 266 bool dexIsClassDescriptor(const char* s) { 267 if (!dexIsValidTypeDescriptor(s)) { 268 return false; 269 } 270 271 return s[0] == 'L'; 272 } 273 274 /* Return whether the given string is a valid field type descriptor. This 275 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 276 * is for anything but "void". */ 277 bool dexIsFieldDescriptor(const char* s) { 278 if (!dexIsValidTypeDescriptor(s)) { 279 return false; 280 } 281 282 return s[0] != 'V'; 283 } 284 285 /* Return the UTF-8 encoded string with the specified string_id index, 286 * also filling in the UTF-16 size (number of 16-bit code points).*/ 287 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, 288 u4* utf16Size) { 289 const DexStringId* pStringId = dexGetStringId(pDexFile, idx); 290 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; 291 292 *utf16Size = readUnsignedLeb128(&ptr); 293 return (const char*) ptr; 294 } 295 296 /* 297 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at 298 * least kSHA1DigestOutputLen bytes. 299 */ 300 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); 301 302 /* 303 * Compute a SHA-1 digest on a range of bytes. 304 */ 305 static void dexComputeSHA1Digest(const unsigned char* data, size_t length, 306 unsigned char digest[]) 307 { 308 SHA1_CTX context; 309 SHA1Init(&context); 310 SHA1Update(&context, data, length); 311 SHA1Final(digest, &context); 312 } 313 314 /* 315 * Format the SHA-1 digest into the buffer, which must be able to hold at 316 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, 317 */ 318 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) 319 { 320 static const char hexDigit[] = "0123456789abcdef"; 321 char* cp; 322 int i; 323 324 cp = tmpBuf; 325 for (i = 0; i < kSHA1DigestLen; i++) { 326 *cp++ = hexDigit[digest[i] >> 4]; 327 *cp++ = hexDigit[digest[i] & 0x0f]; 328 } 329 *cp++ = '\0'; 330 331 assert(cp == tmpBuf + kSHA1DigestOutputLen); 332 333 return tmpBuf; 334 } 335 336 /* 337 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 338 * 339 * This may or may not be compatible with UTF-8 hash functions used inside 340 * the Dalvik VM. 341 * 342 * The basic "multiply by 31 and add" approach does better on class names 343 * than most other things tried (e.g. adler32). 344 */ 345 static u4 classDescriptorHash(const char* str) 346 { 347 u4 hash = 1; 348 349 while (*str != '\0') 350 hash = hash * 31 + *str++; 351 352 return hash; 353 } 354 355 /* 356 * Add an entry to the class lookup table. We hash the string and probe 357 * until we find an open slot. 358 */ 359 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, 360 int stringOff, int classDefOff, int* pNumProbes) 361 { 362 const char* classDescriptor = 363 (const char*) (pDexFile->baseAddr + stringOff); 364 const DexClassDef* pClassDef = 365 (const DexClassDef*) (pDexFile->baseAddr + classDefOff); 366 u4 hash = classDescriptorHash(classDescriptor); 367 int mask = pLookup->numEntries-1; 368 int idx = hash & mask; 369 370 /* 371 * Find the first empty slot. We oversized the table, so this is 372 * guaranteed to finish. 373 */ 374 int probes = 0; 375 while (pLookup->table[idx].classDescriptorOffset != 0) { 376 idx = (idx + 1) & mask; 377 probes++; 378 } 379 //if (probes > 1) 380 // LOGW("classLookupAdd: probes=%d\n", probes); 381 382 pLookup->table[idx].classDescriptorHash = hash; 383 pLookup->table[idx].classDescriptorOffset = stringOff; 384 pLookup->table[idx].classDefOffset = classDefOff; 385 *pNumProbes = probes; 386 } 387 388 /* 389 * Round up to the next highest power of 2. 390 * 391 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 392 */ 393 u4 dexRoundUpPower2(u4 val) 394 { 395 val--; 396 val |= val >> 1; 397 val |= val >> 2; 398 val |= val >> 4; 399 val |= val >> 8; 400 val |= val >> 16; 401 val++; 402 403 return val; 404 } 405 406 /* 407 * Create the class lookup hash table. 408 * 409 * Returns newly-allocated storage. 410 */ 411 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) 412 { 413 DexClassLookup* pLookup; 414 int allocSize; 415 int i, numEntries; 416 int numProbes, totalProbes, maxProbes; 417 418 numProbes = totalProbes = maxProbes = 0; 419 420 assert(pDexFile != NULL); 421 422 /* 423 * Using a factor of 3 results in far less probing than a factor of 2, 424 * but almost doubles the flash storage requirements for the bootstrap 425 * DEX files. The overall impact on class loading performance seems 426 * to be minor. We could probably get some performance improvement by 427 * using a secondary hash. 428 */ 429 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); 430 allocSize = offsetof(DexClassLookup, table) 431 + numEntries * sizeof(pLookup->table[0]); 432 433 pLookup = (DexClassLookup*) calloc(1, allocSize); 434 if (pLookup == NULL) 435 return NULL; 436 pLookup->size = allocSize; 437 pLookup->numEntries = numEntries; 438 439 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { 440 const DexClassDef* pClassDef; 441 const char* pString; 442 443 pClassDef = dexGetClassDef(pDexFile, i); 444 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 445 446 classLookupAdd(pDexFile, pLookup, 447 (u1*)pString - pDexFile->baseAddr, 448 (u1*)pClassDef - pDexFile->baseAddr, &numProbes); 449 450 if (numProbes > maxProbes) 451 maxProbes = numProbes; 452 totalProbes += numProbes; 453 } 454 455 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" 456 " total=%d max=%d\n", 457 pDexFile->pHeader->classDefsSize, numEntries, 458 (100 * pDexFile->pHeader->classDefsSize) / numEntries, 459 allocSize, totalProbes, maxProbes); 460 461 return pLookup; 462 } 463 464 465 /* 466 * Set up the basic raw data pointers of a DexFile. This function isn't 467 * meant for general use. 468 */ 469 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { 470 DexHeader *pHeader = (DexHeader*) data; 471 472 pDexFile->baseAddr = data; 473 pDexFile->pHeader = pHeader; 474 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); 475 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); 476 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); 477 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); 478 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); 479 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); 480 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); 481 } 482 483 484 /* 485 * Parse out an index map entry, advancing "*pData" and reducing "*pSize". 486 */ 487 static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding, 488 u4* pFullCount, u4* pReducedCount, const u2** pMap) 489 { 490 const u4* wordPtr = (const u4*) *pData; 491 u4 size = *pSize; 492 u4 mapCount; 493 494 if (expanding) { 495 if (size < 4) 496 return false; 497 mapCount = *pReducedCount = *wordPtr++; 498 *pFullCount = (u4) -1; 499 size -= sizeof(u4); 500 } else { 501 if (size < 8) 502 return false; 503 mapCount = *pFullCount = *wordPtr++; 504 *pReducedCount = *wordPtr++; 505 size -= sizeof(u4) * 2; 506 } 507 508 u4 mapSize = mapCount * sizeof(u2); 509 510 if (size < mapSize) 511 return false; 512 *pMap = (const u2*) wordPtr; 513 size -= mapSize; 514 515 /* advance the pointer */ 516 const u1* ptr = (const u1*) wordPtr; 517 ptr += (mapSize + 3) & ~0x3; 518 519 /* update pass-by-reference values */ 520 *pData = (const u1*) ptr; 521 *pSize = size; 522 523 return true; 524 } 525 526 /* 527 * Set up some pointers into the mapped data. 528 * 529 * See analysis/ReduceConstants.c for the data layout description. 530 */ 531 static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size, 532 bool expanding) 533 { 534 if (!parseIndexMapEntry(&data, &size, expanding, 535 &pDexFile->indexMap.classFullCount, 536 &pDexFile->indexMap.classReducedCount, 537 &pDexFile->indexMap.classMap)) 538 { 539 return false; 540 } 541 542 if (!parseIndexMapEntry(&data, &size, expanding, 543 &pDexFile->indexMap.methodFullCount, 544 &pDexFile->indexMap.methodReducedCount, 545 &pDexFile->indexMap.methodMap)) 546 { 547 return false; 548 } 549 550 if (!parseIndexMapEntry(&data, &size, expanding, 551 &pDexFile->indexMap.fieldFullCount, 552 &pDexFile->indexMap.fieldReducedCount, 553 &pDexFile->indexMap.fieldMap)) 554 { 555 return false; 556 } 557 558 if (!parseIndexMapEntry(&data, &size, expanding, 559 &pDexFile->indexMap.stringFullCount, 560 &pDexFile->indexMap.stringReducedCount, 561 &pDexFile->indexMap.stringMap)) 562 { 563 return false; 564 } 565 566 if (expanding) { 567 /* 568 * The map includes the "reduced" counts; pull the original counts 569 * out of the DexFile so that code has a consistent source. 570 */ 571 assert(pDexFile->indexMap.classFullCount == (u4) -1); 572 assert(pDexFile->indexMap.methodFullCount == (u4) -1); 573 assert(pDexFile->indexMap.fieldFullCount == (u4) -1); 574 assert(pDexFile->indexMap.stringFullCount == (u4) -1); 575 576 #if 0 // TODO: not available yet -- do later or just skip this 577 pDexFile->indexMap.classFullCount = 578 pDexFile->pHeader->typeIdsSize; 579 pDexFile->indexMap.methodFullCount = 580 pDexFile->pHeader->methodIdsSize; 581 pDexFile->indexMap.fieldFullCount = 582 pDexFile->pHeader->fieldIdsSize; 583 pDexFile->indexMap.stringFullCount = 584 pDexFile->pHeader->stringIdsSize; 585 #endif 586 } 587 588 LOGI("Class : %u %u %u\n", 589 pDexFile->indexMap.classFullCount, 590 pDexFile->indexMap.classReducedCount, 591 pDexFile->indexMap.classMap[0]); 592 LOGI("Method: %u %u %u\n", 593 pDexFile->indexMap.methodFullCount, 594 pDexFile->indexMap.methodReducedCount, 595 pDexFile->indexMap.methodMap[0]); 596 LOGI("Field : %u %u %u\n", 597 pDexFile->indexMap.fieldFullCount, 598 pDexFile->indexMap.fieldReducedCount, 599 pDexFile->indexMap.fieldMap[0]); 600 LOGI("String: %u %u %u\n", 601 pDexFile->indexMap.stringFullCount, 602 pDexFile->indexMap.stringReducedCount, 603 pDexFile->indexMap.stringMap[0]); 604 605 return true; 606 } 607 608 /* 609 * Parse some auxillary data tables. 610 * 611 * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup 612 * table. Subsequent versions switched to the "chunk" format. 613 */ 614 static bool parseAuxData(const u1* data, DexFile* pDexFile) 615 { 616 const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset); 617 u4 indexMapType = 0; 618 619 /* v1.0 format? */ 620 if (*pAux == 0) { 621 LOGV("+++ found OLD dex format\n"); 622 pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1); 623 return true; 624 } 625 LOGV("+++ found NEW dex format\n"); 626 627 /* process chunks until we see the end marker */ 628 while (*pAux != kDexChunkEnd) { 629 u4 size = *(pAux+1); 630 u1* data = (u1*) (pAux + 2); 631 632 switch (*pAux) { 633 case kDexChunkClassLookup: 634 pDexFile->pClassLookup = (const DexClassLookup*) data; 635 break; 636 case kDexChunkReducingIndexMap: 637 LOGI("+++ found reducing index map, size=%u\n", size); 638 if (!parseIndexMap(pDexFile, data, size, false)) { 639 LOGE("Failed parsing reducing index map\n"); 640 return false; 641 } 642 indexMapType = *pAux; 643 break; 644 case kDexChunkExpandingIndexMap: 645 LOGI("+++ found expanding index map, size=%u\n", size); 646 if (!parseIndexMap(pDexFile, data, size, true)) { 647 LOGE("Failed parsing expanding index map\n"); 648 return false; 649 } 650 indexMapType = *pAux; 651 break; 652 case kDexChunkRegisterMaps: 653 LOGV("+++ found register maps, size=%u\n", size); 654 pDexFile->pRegisterMapPool = data; 655 break; 656 default: 657 LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n", 658 *pAux, 659 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16), 660 (char) ((*pAux) >> 8), (char) (*pAux), 661 size); 662 break; 663 } 664 665 /* 666 * Advance pointer, padding to 64-bit boundary. The extra "+8" is 667 * for the type/size header. 668 */ 669 size = (size + 8 + 7) & ~7; 670 pAux += size / sizeof(u4); 671 } 672 673 #if 0 // TODO: propagate expected map type from the VM through the API 674 /* 675 * If we're configured to expect an index map, and we don't find one, 676 * reject this DEX so we'll regenerate it. Also, if we found an 677 * "expanding" map but we're not configured to use it, we have to fail 678 * because the constants aren't usable without translation. 679 */ 680 if (indexMapType != expectedIndexMapType) { 681 LOGW("Incompatible index map configuration: found 0x%04x, need %d\n", 682 indexMapType, DVM_REDUCE_CONSTANTS); 683 return false; 684 } 685 #endif 686 687 return true; 688 } 689 690 /* 691 * Parse an optimized or unoptimized .dex file sitting in memory. This is 692 * called after the byte-ordering and structure alignment has been fixed up. 693 * 694 * On success, return a newly-allocated DexFile. 695 */ 696 DexFile* dexFileParse(const u1* data, size_t length, int flags) 697 { 698 DexFile* pDexFile = NULL; 699 const DexHeader* pHeader; 700 const u1* magic; 701 int result = -1; 702 703 if (length < sizeof(DexHeader)) { 704 LOGE("too short to be a valid .dex\n"); 705 goto bail; /* bad file format */ 706 } 707 708 pDexFile = (DexFile*) malloc(sizeof(DexFile)); 709 if (pDexFile == NULL) 710 goto bail; /* alloc failure */ 711 memset(pDexFile, 0, sizeof(DexFile)); 712 713 /* 714 * Peel off the optimized header. 715 */ 716 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { 717 magic = data; 718 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { 719 LOGE("bad opt version (0x%02x %02x %02x %02x)\n", 720 magic[4], magic[5], magic[6], magic[7]); 721 goto bail; 722 } 723 724 pDexFile->pOptHeader = (const DexOptHeader*) data; 725 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", 726 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); 727 728 /* locate some auxillary data tables */ 729 if (!parseAuxData(data, pDexFile)) 730 goto bail; 731 732 /* ignore the opt header and appended data from here on out */ 733 data += pDexFile->pOptHeader->dexOffset; 734 length -= pDexFile->pOptHeader->dexOffset; 735 if (pDexFile->pOptHeader->dexLength > length) { 736 LOGE("File truncated? stored len=%d, rem len=%d\n", 737 pDexFile->pOptHeader->dexLength, (int) length); 738 goto bail; 739 } 740 length = pDexFile->pOptHeader->dexLength; 741 } 742 743 dexFileSetupBasicPointers(pDexFile, data); 744 pHeader = pDexFile->pHeader; 745 746 magic = pHeader->magic; 747 if (memcmp(magic, DEX_MAGIC, 4) != 0) { 748 /* not expected */ 749 LOGE("bad magic number (0x%02x %02x %02x %02x)\n", 750 magic[0], magic[1], magic[2], magic[3]); 751 goto bail; 752 } 753 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { 754 LOGE("bad dex version (0x%02x %02x %02x %02x)\n", 755 magic[4], magic[5], magic[6], magic[7]); 756 goto bail; 757 } 758 759 /* 760 * Verify the checksum(s). This is reasonably quick, but does require 761 * touching every byte in the DEX file. The base checksum changes after 762 * byte-swapping and DEX optimization. 763 */ 764 if (flags & kDexParseVerifyChecksum) { 765 u4 adler = dexComputeChecksum(pHeader); 766 if (adler != pHeader->checksum) { 767 LOGE("ERROR: bad checksum (%08x vs %08x)\n", 768 adler, pHeader->checksum); 769 if (!(flags & kDexParseContinueOnError)) 770 goto bail; 771 } else { 772 LOGV("+++ adler32 checksum (%08x) verified\n", adler); 773 } 774 775 const DexOptHeader* pOptHeader = pDexFile->pOptHeader; 776 if (pOptHeader != NULL) { 777 adler = dexComputeOptChecksum(pOptHeader); 778 if (adler != pOptHeader->checksum) { 779 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n", 780 adler, pOptHeader->checksum); 781 if (!(flags & kDexParseContinueOnError)) 782 goto bail; 783 } else { 784 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler); 785 } 786 } 787 } 788 789 /* 790 * Verify the SHA-1 digest. (Normally we don't want to do this -- 791 * the digest is used to uniquely identify the original DEX file, and 792 * can't be computed for verification after the DEX is byte-swapped 793 * and optimized.) 794 */ 795 if (kVerifySignature) { 796 unsigned char sha1Digest[kSHA1DigestLen]; 797 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + 798 kSHA1DigestLen; 799 800 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); 801 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { 802 char tmpBuf1[kSHA1DigestOutputLen]; 803 char tmpBuf2[kSHA1DigestOutputLen]; 804 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", 805 dexSHA1DigestToStr(sha1Digest, tmpBuf1), 806 dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); 807 if (!(flags & kDexParseContinueOnError)) 808 goto bail; 809 } else { 810 LOGV("+++ sha1 digest verified\n"); 811 } 812 } 813 814 if (pHeader->fileSize != length) { 815 LOGE("ERROR: stored file size (%d) != expected (%d)\n", 816 (int) pHeader->fileSize, (int) length); 817 if (!(flags & kDexParseContinueOnError)) 818 goto bail; 819 } 820 821 if (pHeader->classDefsSize == 0) { 822 LOGE("ERROR: DEX file has no classes in it, failing\n"); 823 goto bail; 824 } 825 826 /* 827 * Success! 828 */ 829 result = 0; 830 831 bail: 832 if (result != 0 && pDexFile != NULL) { 833 dexFileFree(pDexFile); 834 pDexFile = NULL; 835 } 836 return pDexFile; 837 } 838 839 /* 840 * Free up the DexFile and any associated data structures. 841 * 842 * Note we may be called with a partially-initialized DexFile. 843 */ 844 void dexFileFree(DexFile* pDexFile) 845 { 846 if (pDexFile == NULL) 847 return; 848 849 free(pDexFile); 850 } 851 852 /* 853 * Look up a class definition entry by descriptor. 854 * 855 * "descriptor" should look like "Landroid/debug/Stuff;". 856 */ 857 const DexClassDef* dexFindClass(const DexFile* pDexFile, 858 const char* descriptor) 859 { 860 const DexClassLookup* pLookup = pDexFile->pClassLookup; 861 u4 hash; 862 int idx, mask; 863 864 hash = classDescriptorHash(descriptor); 865 mask = pLookup->numEntries - 1; 866 idx = hash & mask; 867 868 /* 869 * Search until we find a matching entry or an empty slot. 870 */ 871 while (true) { 872 int offset; 873 874 offset = pLookup->table[idx].classDescriptorOffset; 875 if (offset == 0) 876 return NULL; 877 878 if (pLookup->table[idx].classDescriptorHash == hash) { 879 const char* str; 880 881 str = (const char*) (pDexFile->baseAddr + offset); 882 if (strcmp(str, descriptor) == 0) { 883 return (const DexClassDef*) 884 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); 885 } 886 } 887 888 idx = (idx + 1) & mask; 889 } 890 } 891 892 893 /* 894 * Compute the DEX file checksum for a memory-mapped DEX file. 895 */ 896 u4 dexComputeChecksum(const DexHeader* pHeader) 897 { 898 const u1* start = (const u1*) pHeader; 899 900 uLong adler = adler32(0L, Z_NULL, 0); 901 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); 902 903 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); 904 } 905 906 /* 907 * Compute the checksum on the data appended to the DEX file by dexopt. 908 */ 909 static u4 dexComputeOptChecksum(const DexOptHeader* pOptHeader) 910 { 911 const u1* start = (const u1*) pOptHeader + pOptHeader->depsOffset; 912 const u1* end = (const u1*) pOptHeader + 913 pOptHeader->auxOffset + pOptHeader->auxLength; 914 915 uLong adler = adler32(0L, Z_NULL, 0); 916 917 return (u4) adler32(adler, start, end - start); 918 } 919 920 921 /* 922 * Compute the size, in bytes, of a DexCode. 923 */ 924 size_t dexGetDexCodeSize(const DexCode* pCode) 925 { 926 /* 927 * The catch handler data is the last entry. It has a variable number 928 * of variable-size pieces, so we need to create an iterator. 929 */ 930 u4 handlersSize; 931 u4 offset; 932 u4 ui; 933 934 if (pCode->triesSize != 0) { 935 handlersSize = dexGetHandlersSize(pCode); 936 offset = dexGetFirstHandlerOffset(pCode); 937 } else { 938 handlersSize = 0; 939 offset = 0; 940 } 941 942 for (ui = 0; ui < handlersSize; ui++) { 943 DexCatchIterator iterator; 944 dexCatchIteratorInit(&iterator, pCode, offset); 945 offset = dexCatchIteratorGetEndOffset(&iterator, pCode); 946 } 947 948 const u1* handlerData = dexGetCatchHandlerData(pCode); 949 950 //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n", 951 // pCode, handlerData, offset); 952 953 /* return the size of the catch handler + everything before it */ 954 return (handlerData - (u1*) pCode) + offset; 955 } 956 957 958 /* 959 * =========================================================================== 960 * Debug info 961 * =========================================================================== 962 */ 963 964 /* 965 * Decode the arguments in a method signature, which looks something 966 * like "(ID[Ljava/lang/String;)V". 967 * 968 * Returns the type signature letter for the next argument, or ')' if 969 * there are no more args. Advances "pSig" to point to the character 970 * after the one returned. 971 */ 972 static char decodeSignature(const char** pSig) 973 { 974 const char* sig = *pSig; 975 976 if (*sig == '(') 977 sig++; 978 979 if (*sig == 'L') { 980 /* object ref */ 981 while (*++sig != ';') 982 ; 983 *pSig = sig+1; 984 return 'L'; 985 } 986 if (*sig == '[') { 987 /* array; advance past array type */ 988 while (*++sig == '[') 989 ; 990 if (*sig == 'L') { 991 while (*++sig != ';') 992 ; 993 } 994 *pSig = sig+1; 995 return '['; 996 } 997 if (*sig == '\0') 998 return *sig; /* don't advance further */ 999 1000 *pSig = sig+1; 1001 return *sig; 1002 } 1003 1004 /* 1005 * returns the length of a type string, given the start of the 1006 * type string. Used for the case where the debug info format 1007 * references types that are inside a method type signature. 1008 */ 1009 static int typeLength (const char *type) { 1010 // Assumes any leading '(' has already been gobbled 1011 const char *end = type; 1012 decodeSignature(&end); 1013 return end - type; 1014 } 1015 1016 /* 1017 * Reads a string index as encoded for the debug info format, 1018 * returning a string pointer or NULL as appropriate. 1019 */ 1020 static const char* readStringIdx(const DexFile* pDexFile, 1021 const u1** pStream) { 1022 u4 stringIdx = readUnsignedLeb128(pStream); 1023 1024 // Remember, encoded string indicies have 1 added to them. 1025 if (stringIdx == 0) { 1026 return NULL; 1027 } else { 1028 return dexStringById(pDexFile, stringIdx - 1); 1029 } 1030 } 1031 1032 /* 1033 * Reads a type index as encoded for the debug info format, returning 1034 * a string pointer for its descriptor or NULL as appropriate. 1035 */ 1036 static const char* readTypeIdx(const DexFile* pDexFile, 1037 const u1** pStream) { 1038 u4 typeIdx = readUnsignedLeb128(pStream); 1039 1040 // Remember, encoded type indicies have 1 added to them. 1041 if (typeIdx == 0) { 1042 return NULL; 1043 } else { 1044 return dexStringByTypeIdx(pDexFile, typeIdx - 1); 1045 } 1046 } 1047 1048 /* access_flag value indicating that a method is static */ 1049 #define ACC_STATIC 0x0008 1050 1051 typedef struct LocalInfo { 1052 const char *name; 1053 const char *descriptor; 1054 const char *signature; 1055 u2 startAddress; 1056 bool live; 1057 } LocalInfo; 1058 1059 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress, 1060 LocalInfo *localInReg, DexDebugNewLocalCb localCb) 1061 { 1062 if (localCb != NULL && localInReg[reg].live) { 1063 localCb(cnxt, reg, localInReg[reg].startAddress, endAddress, 1064 localInReg[reg].name, 1065 localInReg[reg].descriptor, 1066 localInReg[reg].signature == NULL 1067 ? "" : localInReg[reg].signature ); 1068 } 1069 } 1070 1071 // TODO optimize localCb == NULL case 1072 void dexDecodeDebugInfo( 1073 const DexFile* pDexFile, 1074 const DexCode* pCode, 1075 const char* classDescriptor, 1076 u4 protoIdx, 1077 u4 accessFlags, 1078 DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, 1079 void* cnxt) 1080 { 1081 const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode); 1082 u4 line; 1083 u4 parametersSize; 1084 u4 address = 0; 1085 LocalInfo localInReg[pCode->registersSize]; 1086 u4 insnsSize = pCode->insnsSize; 1087 DexProto proto = { pDexFile, protoIdx }; 1088 1089 memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize); 1090 1091 if (stream == NULL) { 1092 goto end; 1093 } 1094 1095 line = readUnsignedLeb128(&stream); 1096 parametersSize = readUnsignedLeb128(&stream); 1097 1098 u2 argReg = pCode->registersSize - pCode->insSize; 1099 1100 if ((accessFlags & ACC_STATIC) == 0) { 1101 /* 1102 * The code is an instance method, which means that there is 1103 * an initial this parameter. Also, the proto list should 1104 * contain exactly one fewer argument word than the insSize 1105 * indicates. 1106 */ 1107 assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1)); 1108 localInReg[argReg].name = "this"; 1109 localInReg[argReg].descriptor = classDescriptor; 1110 localInReg[argReg].startAddress = 0; 1111 localInReg[argReg].live = true; 1112 argReg++; 1113 } else { 1114 assert(pCode->insSize == dexProtoComputeArgsSize(&proto)); 1115 } 1116 1117 DexParameterIterator iterator; 1118 dexParameterIteratorInit(&iterator, &proto); 1119 1120 while (parametersSize-- != 0) { 1121 const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); 1122 const char *name; 1123 int reg; 1124 1125 if ((argReg >= pCode->registersSize) || (descriptor == NULL)) { 1126 goto invalid_stream; 1127 } 1128 1129 name = readStringIdx(pDexFile, &stream); 1130 reg = argReg; 1131 1132 switch (descriptor[0]) { 1133 case 'D': 1134 case 'J': 1135 argReg += 2; 1136 break; 1137 default: 1138 argReg += 1; 1139 break; 1140 } 1141 1142 if (name != NULL) { 1143 localInReg[reg].name = name; 1144 localInReg[reg].descriptor = descriptor; 1145 localInReg[reg].signature = NULL; 1146 localInReg[reg].startAddress = address; 1147 localInReg[reg].live = true; 1148 } 1149 } 1150 1151 for (;;) { 1152 u1 opcode = *stream++; 1153 u2 reg; 1154 1155 switch (opcode) { 1156 case DBG_END_SEQUENCE: 1157 goto end; 1158 1159 case DBG_ADVANCE_PC: 1160 address += readUnsignedLeb128(&stream); 1161 break; 1162 1163 case DBG_ADVANCE_LINE: 1164 line += readSignedLeb128(&stream); 1165 break; 1166 1167 case DBG_START_LOCAL: 1168 case DBG_START_LOCAL_EXTENDED: 1169 reg = readUnsignedLeb128(&stream); 1170 if (reg > pCode->registersSize) goto invalid_stream; 1171 1172 // Emit what was previously there, if anything 1173 emitLocalCbIfLive (cnxt, reg, address, 1174 localInReg, localCb); 1175 1176 localInReg[reg].name = readStringIdx(pDexFile, &stream); 1177 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream); 1178 if (opcode == DBG_START_LOCAL_EXTENDED) { 1179 localInReg[reg].signature 1180 = readStringIdx(pDexFile, &stream); 1181 } else { 1182 localInReg[reg].signature = NULL; 1183 } 1184 localInReg[reg].startAddress = address; 1185 localInReg[reg].live = true; 1186 break; 1187 1188 case DBG_END_LOCAL: 1189 reg = readUnsignedLeb128(&stream); 1190 if (reg > pCode->registersSize) goto invalid_stream; 1191 1192 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); 1193 localInReg[reg].live = false; 1194 break; 1195 1196 case DBG_RESTART_LOCAL: 1197 reg = readUnsignedLeb128(&stream); 1198 if (reg > pCode->registersSize) goto invalid_stream; 1199 1200 if (localInReg[reg].name == NULL 1201 || localInReg[reg].descriptor == NULL) { 1202 goto invalid_stream; 1203 } 1204 1205 /* 1206 * If the register is live, the "restart" is superfluous, 1207 * and we don't want to mess with the existing start address. 1208 */ 1209 if (!localInReg[reg].live) { 1210 localInReg[reg].startAddress = address; 1211 localInReg[reg].live = true; 1212 } 1213 break; 1214 1215 case DBG_SET_PROLOGUE_END: 1216 case DBG_SET_EPILOGUE_BEGIN: 1217 case DBG_SET_FILE: 1218 break; 1219 1220 default: { 1221 int adjopcode = opcode - DBG_FIRST_SPECIAL; 1222 1223 address += adjopcode / DBG_LINE_RANGE; 1224 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE); 1225 1226 if (posCb != NULL) { 1227 int done; 1228 done = posCb(cnxt, address, line); 1229 1230 if (done) { 1231 // early exit 1232 goto end; 1233 } 1234 } 1235 break; 1236 } 1237 } 1238 } 1239 1240 end: 1241 { 1242 int reg; 1243 for (reg = 0; reg < pCode->registersSize; reg++) { 1244 emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb); 1245 } 1246 } 1247 return; 1248 1249 invalid_stream: 1250 IF_LOGE() { 1251 char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto); 1252 LOGE("Invalid debug info stream. class %s; proto %s", 1253 classDescriptor, methodDescriptor); 1254 free(methodDescriptor); 1255 } 1256 } 1257 1258