1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Access the contents of a .dex file. 19 */ 20 21 #include "DexFile.h" 22 #include "DexOptData.h" 23 #include "DexProto.h" 24 #include "DexCatch.h" 25 #include "Leb128.h" 26 #include "sha1.h" 27 #include "ZipArchive.h" 28 29 #include <zlib.h> 30 31 #include <stdlib.h> 32 #include <stddef.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <errno.h> 36 37 38 /* 39 * Verifying checksums is good, but it slows things down and causes us to 40 * touch every page. In the "optimized" world, it doesn't work at all, 41 * because we rewrite the contents. 42 */ 43 static const bool kVerifyChecksum = false; 44 static const bool kVerifySignature = false; 45 46 47 /* Compare two '\0'-terminated modified UTF-8 strings, using Unicode 48 * code point values for comparison. This treats different encodings 49 * for the same code point as equivalent, except that only a real '\0' 50 * byte is considered the string terminator. The return value is as 51 * for strcmp(). */ 52 int dexUtf8Cmp(const char* s1, const char* s2) { 53 for (;;) { 54 if (*s1 == '\0') { 55 if (*s2 == '\0') { 56 return 0; 57 } 58 return -1; 59 } else if (*s2 == '\0') { 60 return 1; 61 } 62 63 int utf1 = dexGetUtf16FromUtf8(&s1); 64 int utf2 = dexGetUtf16FromUtf8(&s2); 65 int diff = utf1 - utf2; 66 67 if (diff != 0) { 68 return diff; 69 } 70 } 71 } 72 73 /* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ 74 u4 DEX_MEMBER_VALID_LOW_ASCII[4] = { 75 0x00000000, // 00..1f low control characters; nothing valid 76 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-' 77 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_' 78 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z' 79 }; 80 81 /* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ 82 bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { 83 /* 84 * It's a multibyte encoded character. Decode it and analyze. We 85 * accept anything that isn't (a) an improperly encoded low value, 86 * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high 87 * control character, or (e) a high space, layout, or special 88 * character (U+00a0, U+2000..U+200f, U+2028..U+202f, 89 * U+fff0..U+ffff). 90 */ 91 92 u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); 93 94 // Perform follow-up tests based on the high 8 bits. 95 switch (utf16 >> 8) { 96 case 0x00: { 97 // It's only valid if it's above the ISO-8859-1 high space (0xa0). 98 return (utf16 > 0x00a0); 99 } 100 case 0xd8: 101 case 0xd9: 102 case 0xda: 103 case 0xdb: { 104 /* 105 * It's a leading surrogate. Check to see that a trailing 106 * surrogate follows. 107 */ 108 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); 109 return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); 110 } 111 case 0xdc: 112 case 0xdd: 113 case 0xde: 114 case 0xdf: { 115 // It's a trailing surrogate, which is not valid at this point. 116 return false; 117 } 118 case 0x20: 119 case 0xff: { 120 // It's in the range that has spaces, controls, and specials. 121 switch (utf16 & 0xfff8) { 122 case 0x2000: 123 case 0x2008: 124 case 0x2028: 125 case 0xfff0: 126 case 0xfff8: { 127 return false; 128 } 129 } 130 break; 131 } 132 } 133 134 return true; 135 } 136 137 /* Return whether the given string is a valid field or method name. */ 138 bool dexIsValidMemberName(const char* s) { 139 bool angleName = false; 140 141 switch (*s) { 142 case '\0': { 143 // The empty string is not a valid name. 144 return false; 145 } 146 case '<': { 147 /* 148 * '<' is allowed only at the start of a name, and if present, 149 * means that the name must end with '>'. 150 */ 151 angleName = true; 152 s++; 153 break; 154 } 155 } 156 157 for (;;) { 158 switch (*s) { 159 case '\0': { 160 return !angleName; 161 } 162 case '>': { 163 return angleName && s[1] == '\0'; 164 } 165 } 166 if (!dexIsValidMemberNameUtf8(&s)) { 167 return false; 168 } 169 } 170 } 171 172 /* Return whether the given string is a valid type descriptor. */ 173 bool dexIsValidTypeDescriptor(const char* s) { 174 int arrayCount = 0; 175 176 while (*s == '[') { 177 arrayCount++; 178 s++; 179 } 180 181 if (arrayCount > 255) { 182 // Arrays may have no more than 255 dimensions. 183 return false; 184 } 185 186 switch (*(s++)) { 187 case 'B': 188 case 'C': 189 case 'D': 190 case 'F': 191 case 'I': 192 case 'J': 193 case 'S': 194 case 'Z': { 195 // These are all single-character descriptors for primitive types. 196 return (*s == '\0'); 197 } 198 case 'V': { 199 // You can't have an array of void. 200 return (arrayCount == 0) && (*s == '\0'); 201 } 202 case 'L': { 203 // Break out and continue below. 204 break; 205 } 206 default: { 207 // Oddball descriptor character. 208 return false; 209 } 210 } 211 212 // We just consumed the 'L' that introduces a class name. 213 214 bool slashOrFirst = true; // first character or just encountered a slash 215 for (;;) { 216 u1 c = (u1) *s; 217 switch (c) { 218 case '\0': { 219 // Premature end. 220 return false; 221 } 222 case ';': { 223 /* 224 * Make sure that this is the end of the string and that 225 * it doesn't end with an empty component (including the 226 * degenerate case of "L;"). 227 */ 228 return (s[1] == '\0') && !slashOrFirst; 229 } 230 case '/': { 231 if (slashOrFirst) { 232 // Slash at start or two slashes in a row. 233 return false; 234 } 235 slashOrFirst = true; 236 s++; 237 break; 238 } 239 default: { 240 if (!dexIsValidMemberNameUtf8(&s)) { 241 return false; 242 } 243 slashOrFirst = false; 244 break; 245 } 246 } 247 } 248 } 249 250 /* Return whether the given string is a valid reference descriptor. This 251 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 252 * is for a class or array and not a primitive type. */ 253 bool dexIsReferenceDescriptor(const char* s) { 254 if (!dexIsValidTypeDescriptor(s)) { 255 return false; 256 } 257 258 return (s[0] == 'L') || (s[0] == '['); 259 } 260 261 /* Return whether the given string is a valid class descriptor. This 262 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 263 * is for a class and not an array or primitive type. */ 264 bool dexIsClassDescriptor(const char* s) { 265 if (!dexIsValidTypeDescriptor(s)) { 266 return false; 267 } 268 269 return s[0] == 'L'; 270 } 271 272 /* Return whether the given string is a valid field type descriptor. This 273 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 274 * is for anything but "void". */ 275 bool dexIsFieldDescriptor(const char* s) { 276 if (!dexIsValidTypeDescriptor(s)) { 277 return false; 278 } 279 280 return s[0] != 'V'; 281 } 282 283 /* Return the UTF-8 encoded string with the specified string_id index, 284 * also filling in the UTF-16 size (number of 16-bit code points).*/ 285 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, 286 u4* utf16Size) { 287 const DexStringId* pStringId = dexGetStringId(pDexFile, idx); 288 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; 289 290 *utf16Size = readUnsignedLeb128(&ptr); 291 return (const char*) ptr; 292 } 293 294 /* 295 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at 296 * least kSHA1DigestOutputLen bytes. 297 */ 298 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); 299 300 /* 301 * Compute a SHA-1 digest on a range of bytes. 302 */ 303 static void dexComputeSHA1Digest(const unsigned char* data, size_t length, 304 unsigned char digest[]) 305 { 306 SHA1_CTX context; 307 SHA1Init(&context); 308 SHA1Update(&context, data, length); 309 SHA1Final(digest, &context); 310 } 311 312 /* 313 * Format the SHA-1 digest into the buffer, which must be able to hold at 314 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, 315 */ 316 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) 317 { 318 static const char hexDigit[] = "0123456789abcdef"; 319 char* cp; 320 int i; 321 322 cp = tmpBuf; 323 for (i = 0; i < kSHA1DigestLen; i++) { 324 *cp++ = hexDigit[digest[i] >> 4]; 325 *cp++ = hexDigit[digest[i] & 0x0f]; 326 } 327 *cp++ = '\0'; 328 329 assert(cp == tmpBuf + kSHA1DigestOutputLen); 330 331 return tmpBuf; 332 } 333 334 /* 335 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 336 * 337 * This may or may not be compatible with UTF-8 hash functions used inside 338 * the Dalvik VM. 339 * 340 * The basic "multiply by 31 and add" approach does better on class names 341 * than most other things tried (e.g. adler32). 342 */ 343 static u4 classDescriptorHash(const char* str) 344 { 345 u4 hash = 1; 346 347 while (*str != '\0') 348 hash = hash * 31 + *str++; 349 350 return hash; 351 } 352 353 /* 354 * Add an entry to the class lookup table. We hash the string and probe 355 * until we find an open slot. 356 */ 357 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, 358 int stringOff, int classDefOff, int* pNumProbes) 359 { 360 const char* classDescriptor = 361 (const char*) (pDexFile->baseAddr + stringOff); 362 const DexClassDef* pClassDef = 363 (const DexClassDef*) (pDexFile->baseAddr + classDefOff); 364 u4 hash = classDescriptorHash(classDescriptor); 365 int mask = pLookup->numEntries-1; 366 int idx = hash & mask; 367 368 /* 369 * Find the first empty slot. We oversized the table, so this is 370 * guaranteed to finish. 371 */ 372 int probes = 0; 373 while (pLookup->table[idx].classDescriptorOffset != 0) { 374 idx = (idx + 1) & mask; 375 probes++; 376 } 377 //if (probes > 1) 378 // LOGW("classLookupAdd: probes=%d\n", probes); 379 380 pLookup->table[idx].classDescriptorHash = hash; 381 pLookup->table[idx].classDescriptorOffset = stringOff; 382 pLookup->table[idx].classDefOffset = classDefOff; 383 *pNumProbes = probes; 384 } 385 386 /* 387 * Round up to the next highest power of 2. 388 * 389 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 390 */ 391 u4 dexRoundUpPower2(u4 val) 392 { 393 val--; 394 val |= val >> 1; 395 val |= val >> 2; 396 val |= val >> 4; 397 val |= val >> 8; 398 val |= val >> 16; 399 val++; 400 401 return val; 402 } 403 404 /* 405 * Create the class lookup hash table. 406 * 407 * Returns newly-allocated storage. 408 */ 409 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) 410 { 411 DexClassLookup* pLookup; 412 int allocSize; 413 int i, numEntries; 414 int numProbes, totalProbes, maxProbes; 415 416 numProbes = totalProbes = maxProbes = 0; 417 418 assert(pDexFile != NULL); 419 420 /* 421 * Using a factor of 3 results in far less probing than a factor of 2, 422 * but almost doubles the flash storage requirements for the bootstrap 423 * DEX files. The overall impact on class loading performance seems 424 * to be minor. We could probably get some performance improvement by 425 * using a secondary hash. 426 */ 427 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); 428 allocSize = offsetof(DexClassLookup, table) 429 + numEntries * sizeof(pLookup->table[0]); 430 431 pLookup = (DexClassLookup*) calloc(1, allocSize); 432 if (pLookup == NULL) 433 return NULL; 434 pLookup->size = allocSize; 435 pLookup->numEntries = numEntries; 436 437 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { 438 const DexClassDef* pClassDef; 439 const char* pString; 440 441 pClassDef = dexGetClassDef(pDexFile, i); 442 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 443 444 classLookupAdd(pDexFile, pLookup, 445 (u1*)pString - pDexFile->baseAddr, 446 (u1*)pClassDef - pDexFile->baseAddr, &numProbes); 447 448 if (numProbes > maxProbes) 449 maxProbes = numProbes; 450 totalProbes += numProbes; 451 } 452 453 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" 454 " total=%d max=%d\n", 455 pDexFile->pHeader->classDefsSize, numEntries, 456 (100 * pDexFile->pHeader->classDefsSize) / numEntries, 457 allocSize, totalProbes, maxProbes); 458 459 return pLookup; 460 } 461 462 463 /* 464 * Set up the basic raw data pointers of a DexFile. This function isn't 465 * meant for general use. 466 */ 467 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { 468 DexHeader *pHeader = (DexHeader*) data; 469 470 pDexFile->baseAddr = data; 471 pDexFile->pHeader = pHeader; 472 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); 473 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); 474 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); 475 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); 476 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); 477 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); 478 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); 479 } 480 481 /* 482 * Parse an optimized or unoptimized .dex file sitting in memory. This is 483 * called after the byte-ordering and structure alignment has been fixed up. 484 * 485 * On success, return a newly-allocated DexFile. 486 */ 487 DexFile* dexFileParse(const u1* data, size_t length, int flags) 488 { 489 DexFile* pDexFile = NULL; 490 const DexHeader* pHeader; 491 const u1* magic; 492 int result = -1; 493 494 if (length < sizeof(DexHeader)) { 495 LOGE("too short to be a valid .dex\n"); 496 goto bail; /* bad file format */ 497 } 498 499 pDexFile = (DexFile*) malloc(sizeof(DexFile)); 500 if (pDexFile == NULL) 501 goto bail; /* alloc failure */ 502 memset(pDexFile, 0, sizeof(DexFile)); 503 504 /* 505 * Peel off the optimized header. 506 */ 507 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { 508 magic = data; 509 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { 510 LOGE("bad opt version (0x%02x %02x %02x %02x)\n", 511 magic[4], magic[5], magic[6], magic[7]); 512 goto bail; 513 } 514 515 pDexFile->pOptHeader = (const DexOptHeader*) data; 516 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", 517 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); 518 519 /* parse the optimized dex file tables */ 520 if (!dexParseOptData(data, length, pDexFile)) 521 goto bail; 522 523 /* ignore the opt header and appended data from here on out */ 524 data += pDexFile->pOptHeader->dexOffset; 525 length -= pDexFile->pOptHeader->dexOffset; 526 if (pDexFile->pOptHeader->dexLength > length) { 527 LOGE("File truncated? stored len=%d, rem len=%d\n", 528 pDexFile->pOptHeader->dexLength, (int) length); 529 goto bail; 530 } 531 length = pDexFile->pOptHeader->dexLength; 532 } 533 534 dexFileSetupBasicPointers(pDexFile, data); 535 pHeader = pDexFile->pHeader; 536 537 magic = pHeader->magic; 538 if (memcmp(magic, DEX_MAGIC, 4) != 0) { 539 /* not expected */ 540 LOGE("bad magic number (0x%02x %02x %02x %02x)\n", 541 magic[0], magic[1], magic[2], magic[3]); 542 goto bail; 543 } 544 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { 545 LOGE("bad dex version (0x%02x %02x %02x %02x)\n", 546 magic[4], magic[5], magic[6], magic[7]); 547 goto bail; 548 } 549 550 /* 551 * Verify the checksum(s). This is reasonably quick, but does require 552 * touching every byte in the DEX file. The base checksum changes after 553 * byte-swapping and DEX optimization. 554 */ 555 if (flags & kDexParseVerifyChecksum) { 556 u4 adler = dexComputeChecksum(pHeader); 557 if (adler != pHeader->checksum) { 558 LOGE("ERROR: bad checksum (%08x vs %08x)\n", 559 adler, pHeader->checksum); 560 if (!(flags & kDexParseContinueOnError)) 561 goto bail; 562 } else { 563 LOGV("+++ adler32 checksum (%08x) verified\n", adler); 564 } 565 566 const DexOptHeader* pOptHeader = pDexFile->pOptHeader; 567 if (pOptHeader != NULL) { 568 adler = dexComputeOptChecksum(pOptHeader); 569 if (adler != pOptHeader->checksum) { 570 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n", 571 adler, pOptHeader->checksum); 572 if (!(flags & kDexParseContinueOnError)) 573 goto bail; 574 } else { 575 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler); 576 } 577 } 578 } 579 580 /* 581 * Verify the SHA-1 digest. (Normally we don't want to do this -- 582 * the digest is used to uniquely identify the original DEX file, and 583 * can't be computed for verification after the DEX is byte-swapped 584 * and optimized.) 585 */ 586 if (kVerifySignature) { 587 unsigned char sha1Digest[kSHA1DigestLen]; 588 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + 589 kSHA1DigestLen; 590 591 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); 592 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { 593 char tmpBuf1[kSHA1DigestOutputLen]; 594 char tmpBuf2[kSHA1DigestOutputLen]; 595 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", 596 dexSHA1DigestToStr(sha1Digest, tmpBuf1), 597 dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); 598 if (!(flags & kDexParseContinueOnError)) 599 goto bail; 600 } else { 601 LOGV("+++ sha1 digest verified\n"); 602 } 603 } 604 605 if (pHeader->fileSize != length) { 606 LOGE("ERROR: stored file size (%d) != expected (%d)\n", 607 (int) pHeader->fileSize, (int) length); 608 if (!(flags & kDexParseContinueOnError)) 609 goto bail; 610 } 611 612 if (pHeader->classDefsSize == 0) { 613 LOGE("ERROR: DEX file has no classes in it, failing\n"); 614 goto bail; 615 } 616 617 /* 618 * Success! 619 */ 620 result = 0; 621 622 bail: 623 if (result != 0 && pDexFile != NULL) { 624 dexFileFree(pDexFile); 625 pDexFile = NULL; 626 } 627 return pDexFile; 628 } 629 630 /* 631 * Free up the DexFile and any associated data structures. 632 * 633 * Note we may be called with a partially-initialized DexFile. 634 */ 635 void dexFileFree(DexFile* pDexFile) 636 { 637 if (pDexFile == NULL) 638 return; 639 640 free(pDexFile); 641 } 642 643 /* 644 * Look up a class definition entry by descriptor. 645 * 646 * "descriptor" should look like "Landroid/debug/Stuff;". 647 */ 648 const DexClassDef* dexFindClass(const DexFile* pDexFile, 649 const char* descriptor) 650 { 651 const DexClassLookup* pLookup = pDexFile->pClassLookup; 652 u4 hash; 653 int idx, mask; 654 655 hash = classDescriptorHash(descriptor); 656 mask = pLookup->numEntries - 1; 657 idx = hash & mask; 658 659 /* 660 * Search until we find a matching entry or an empty slot. 661 */ 662 while (true) { 663 int offset; 664 665 offset = pLookup->table[idx].classDescriptorOffset; 666 if (offset == 0) 667 return NULL; 668 669 if (pLookup->table[idx].classDescriptorHash == hash) { 670 const char* str; 671 672 str = (const char*) (pDexFile->baseAddr + offset); 673 if (strcmp(str, descriptor) == 0) { 674 return (const DexClassDef*) 675 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); 676 } 677 } 678 679 idx = (idx + 1) & mask; 680 } 681 } 682 683 684 /* 685 * Compute the DEX file checksum for a memory-mapped DEX file. 686 */ 687 u4 dexComputeChecksum(const DexHeader* pHeader) 688 { 689 const u1* start = (const u1*) pHeader; 690 691 uLong adler = adler32(0L, Z_NULL, 0); 692 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); 693 694 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); 695 } 696 697 /* 698 * Compute the size, in bytes, of a DexCode. 699 */ 700 size_t dexGetDexCodeSize(const DexCode* pCode) 701 { 702 /* 703 * The catch handler data is the last entry. It has a variable number 704 * of variable-size pieces, so we need to create an iterator. 705 */ 706 u4 handlersSize; 707 u4 offset; 708 u4 ui; 709 710 if (pCode->triesSize != 0) { 711 handlersSize = dexGetHandlersSize(pCode); 712 offset = dexGetFirstHandlerOffset(pCode); 713 } else { 714 handlersSize = 0; 715 offset = 0; 716 } 717 718 for (ui = 0; ui < handlersSize; ui++) { 719 DexCatchIterator iterator; 720 dexCatchIteratorInit(&iterator, pCode, offset); 721 offset = dexCatchIteratorGetEndOffset(&iterator, pCode); 722 } 723 724 const u1* handlerData = dexGetCatchHandlerData(pCode); 725 726 //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n", 727 // pCode, handlerData, offset); 728 729 /* return the size of the catch handler + everything before it */ 730 return (handlerData - (u1*) pCode) + offset; 731 } 732 733 734 /* 735 * =========================================================================== 736 * Debug info 737 * =========================================================================== 738 */ 739 740 /* 741 * Decode the arguments in a method signature, which looks something 742 * like "(ID[Ljava/lang/String;)V". 743 * 744 * Returns the type signature letter for the next argument, or ')' if 745 * there are no more args. Advances "pSig" to point to the character 746 * after the one returned. 747 */ 748 static char decodeSignature(const char** pSig) 749 { 750 const char* sig = *pSig; 751 752 if (*sig == '(') 753 sig++; 754 755 if (*sig == 'L') { 756 /* object ref */ 757 while (*++sig != ';') 758 ; 759 *pSig = sig+1; 760 return 'L'; 761 } 762 if (*sig == '[') { 763 /* array; advance past array type */ 764 while (*++sig == '[') 765 ; 766 if (*sig == 'L') { 767 while (*++sig != ';') 768 ; 769 } 770 *pSig = sig+1; 771 return '['; 772 } 773 if (*sig == '\0') 774 return *sig; /* don't advance further */ 775 776 *pSig = sig+1; 777 return *sig; 778 } 779 780 /* 781 * returns the length of a type string, given the start of the 782 * type string. Used for the case where the debug info format 783 * references types that are inside a method type signature. 784 */ 785 static int typeLength (const char *type) { 786 // Assumes any leading '(' has already been gobbled 787 const char *end = type; 788 decodeSignature(&end); 789 return end - type; 790 } 791 792 /* 793 * Reads a string index as encoded for the debug info format, 794 * returning a string pointer or NULL as appropriate. 795 */ 796 static const char* readStringIdx(const DexFile* pDexFile, 797 const u1** pStream) { 798 u4 stringIdx = readUnsignedLeb128(pStream); 799 800 // Remember, encoded string indicies have 1 added to them. 801 if (stringIdx == 0) { 802 return NULL; 803 } else { 804 return dexStringById(pDexFile, stringIdx - 1); 805 } 806 } 807 808 /* 809 * Reads a type index as encoded for the debug info format, returning 810 * a string pointer for its descriptor or NULL as appropriate. 811 */ 812 static const char* readTypeIdx(const DexFile* pDexFile, 813 const u1** pStream) { 814 u4 typeIdx = readUnsignedLeb128(pStream); 815 816 // Remember, encoded type indicies have 1 added to them. 817 if (typeIdx == 0) { 818 return NULL; 819 } else { 820 return dexStringByTypeIdx(pDexFile, typeIdx - 1); 821 } 822 } 823 824 /* access_flag value indicating that a method is static */ 825 #define ACC_STATIC 0x0008 826 827 typedef struct LocalInfo { 828 const char *name; 829 const char *descriptor; 830 const char *signature; 831 u2 startAddress; 832 bool live; 833 } LocalInfo; 834 835 static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress, 836 LocalInfo *localInReg, DexDebugNewLocalCb localCb) 837 { 838 if (localCb != NULL && localInReg[reg].live) { 839 localCb(cnxt, reg, localInReg[reg].startAddress, endAddress, 840 localInReg[reg].name, 841 localInReg[reg].descriptor, 842 localInReg[reg].signature == NULL 843 ? "" : localInReg[reg].signature ); 844 } 845 } 846 847 // TODO optimize localCb == NULL case 848 void dexDecodeDebugInfo( 849 const DexFile* pDexFile, 850 const DexCode* pCode, 851 const char* classDescriptor, 852 u4 protoIdx, 853 u4 accessFlags, 854 DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, 855 void* cnxt) 856 { 857 const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode); 858 u4 line; 859 u4 parametersSize; 860 u4 address = 0; 861 LocalInfo localInReg[pCode->registersSize]; 862 u4 insnsSize = pCode->insnsSize; 863 DexProto proto = { pDexFile, protoIdx }; 864 865 memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize); 866 867 if (stream == NULL) { 868 goto end; 869 } 870 871 line = readUnsignedLeb128(&stream); 872 parametersSize = readUnsignedLeb128(&stream); 873 874 u2 argReg = pCode->registersSize - pCode->insSize; 875 876 if ((accessFlags & ACC_STATIC) == 0) { 877 /* 878 * The code is an instance method, which means that there is 879 * an initial this parameter. Also, the proto list should 880 * contain exactly one fewer argument word than the insSize 881 * indicates. 882 */ 883 assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1)); 884 localInReg[argReg].name = "this"; 885 localInReg[argReg].descriptor = classDescriptor; 886 localInReg[argReg].startAddress = 0; 887 localInReg[argReg].live = true; 888 argReg++; 889 } else { 890 assert(pCode->insSize == dexProtoComputeArgsSize(&proto)); 891 } 892 893 DexParameterIterator iterator; 894 dexParameterIteratorInit(&iterator, &proto); 895 896 while (parametersSize-- != 0) { 897 const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); 898 const char *name; 899 int reg; 900 901 if ((argReg >= pCode->registersSize) || (descriptor == NULL)) { 902 goto invalid_stream; 903 } 904 905 name = readStringIdx(pDexFile, &stream); 906 reg = argReg; 907 908 switch (descriptor[0]) { 909 case 'D': 910 case 'J': 911 argReg += 2; 912 break; 913 default: 914 argReg += 1; 915 break; 916 } 917 918 if (name != NULL) { 919 localInReg[reg].name = name; 920 localInReg[reg].descriptor = descriptor; 921 localInReg[reg].signature = NULL; 922 localInReg[reg].startAddress = address; 923 localInReg[reg].live = true; 924 } 925 } 926 927 for (;;) { 928 u1 opcode = *stream++; 929 u2 reg; 930 931 switch (opcode) { 932 case DBG_END_SEQUENCE: 933 goto end; 934 935 case DBG_ADVANCE_PC: 936 address += readUnsignedLeb128(&stream); 937 break; 938 939 case DBG_ADVANCE_LINE: 940 line += readSignedLeb128(&stream); 941 break; 942 943 case DBG_START_LOCAL: 944 case DBG_START_LOCAL_EXTENDED: 945 reg = readUnsignedLeb128(&stream); 946 if (reg > pCode->registersSize) goto invalid_stream; 947 948 // Emit what was previously there, if anything 949 emitLocalCbIfLive (cnxt, reg, address, 950 localInReg, localCb); 951 952 localInReg[reg].name = readStringIdx(pDexFile, &stream); 953 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream); 954 if (opcode == DBG_START_LOCAL_EXTENDED) { 955 localInReg[reg].signature 956 = readStringIdx(pDexFile, &stream); 957 } else { 958 localInReg[reg].signature = NULL; 959 } 960 localInReg[reg].startAddress = address; 961 localInReg[reg].live = true; 962 break; 963 964 case DBG_END_LOCAL: 965 reg = readUnsignedLeb128(&stream); 966 if (reg > pCode->registersSize) goto invalid_stream; 967 968 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); 969 localInReg[reg].live = false; 970 break; 971 972 case DBG_RESTART_LOCAL: 973 reg = readUnsignedLeb128(&stream); 974 if (reg > pCode->registersSize) goto invalid_stream; 975 976 if (localInReg[reg].name == NULL 977 || localInReg[reg].descriptor == NULL) { 978 goto invalid_stream; 979 } 980 981 /* 982 * If the register is live, the "restart" is superfluous, 983 * and we don't want to mess with the existing start address. 984 */ 985 if (!localInReg[reg].live) { 986 localInReg[reg].startAddress = address; 987 localInReg[reg].live = true; 988 } 989 break; 990 991 case DBG_SET_PROLOGUE_END: 992 case DBG_SET_EPILOGUE_BEGIN: 993 case DBG_SET_FILE: 994 break; 995 996 default: { 997 int adjopcode = opcode - DBG_FIRST_SPECIAL; 998 999 address += adjopcode / DBG_LINE_RANGE; 1000 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE); 1001 1002 if (posCb != NULL) { 1003 int done; 1004 done = posCb(cnxt, address, line); 1005 1006 if (done) { 1007 // early exit 1008 goto end; 1009 } 1010 } 1011 break; 1012 } 1013 } 1014 } 1015 1016 end: 1017 { 1018 int reg; 1019 for (reg = 0; reg < pCode->registersSize; reg++) { 1020 emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb); 1021 } 1022 } 1023 return; 1024 1025 invalid_stream: 1026 IF_LOGE() { 1027 char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto); 1028 LOGE("Invalid debug info stream. class %s; proto %s", 1029 classDescriptor, methodDescriptor); 1030 free(methodDescriptor); 1031 } 1032 } 1033