1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Access the contents of a .dex file. 19 */ 20 21 #include "DexFile.h" 22 #include "DexOptData.h" 23 #include "DexProto.h" 24 #include "DexCatch.h" 25 #include "Leb128.h" 26 #include "sha1.h" 27 #include "ZipArchive.h" 28 29 #include <zlib.h> 30 31 #include <stdlib.h> 32 #include <stddef.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <errno.h> 36 37 38 /* 39 * Verifying checksums is good, but it slows things down and causes us to 40 * touch every page. In the "optimized" world, it doesn't work at all, 41 * because we rewrite the contents. 42 */ 43 static const bool kVerifySignature = false; 44 45 /* (documented in header) */ 46 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) { 47 const char* string = dexGetPrimitiveTypeDescriptor(type); 48 49 return (string == NULL) ? '\0' : string[0]; 50 } 51 52 /* (documented in header) */ 53 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) { 54 switch (type) { 55 case PRIM_VOID: return "V"; 56 case PRIM_BOOLEAN: return "Z"; 57 case PRIM_BYTE: return "B"; 58 case PRIM_SHORT: return "S"; 59 case PRIM_CHAR: return "C"; 60 case PRIM_INT: return "I"; 61 case PRIM_LONG: return "J"; 62 case PRIM_FLOAT: return "F"; 63 case PRIM_DOUBLE: return "D"; 64 default: return NULL; 65 } 66 67 return NULL; 68 } 69 70 /* (documented in header) */ 71 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) { 72 switch (type) { 73 case PRIM_VOID: return NULL; 74 case PRIM_BOOLEAN: return "Ljava/lang/Boolean;"; 75 case PRIM_BYTE: return "Ljava/lang/Byte;"; 76 case PRIM_SHORT: return "Ljava/lang/Short;"; 77 case PRIM_CHAR: return "Ljava/lang/Character;"; 78 case PRIM_INT: return "Ljava/lang/Integer;"; 79 case PRIM_LONG: return "Ljava/lang/Long;"; 80 case PRIM_FLOAT: return "Ljava/lang/Float;"; 81 case PRIM_DOUBLE: return "Ljava/lang/Double;"; 82 default: return NULL; 83 } 84 } 85 86 /* (documented in header) */ 87 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) { 88 switch (descriptorChar) { 89 case 'V': return PRIM_VOID; 90 case 'Z': return PRIM_BOOLEAN; 91 case 'B': return PRIM_BYTE; 92 case 'S': return PRIM_SHORT; 93 case 'C': return PRIM_CHAR; 94 case 'I': return PRIM_INT; 95 case 'J': return PRIM_LONG; 96 case 'F': return PRIM_FLOAT; 97 case 'D': return PRIM_DOUBLE; 98 default: return PRIM_NOT; 99 } 100 } 101 102 /* Return the UTF-8 encoded string with the specified string_id index, 103 * also filling in the UTF-16 size (number of 16-bit code points).*/ 104 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, 105 u4* utf16Size) { 106 const DexStringId* pStringId = dexGetStringId(pDexFile, idx); 107 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; 108 109 *utf16Size = readUnsignedLeb128(&ptr); 110 return (const char*) ptr; 111 } 112 113 /* 114 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at 115 * least kSHA1DigestOutputLen bytes. 116 */ 117 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); 118 119 /* 120 * Compute a SHA-1 digest on a range of bytes. 121 */ 122 static void dexComputeSHA1Digest(const unsigned char* data, size_t length, 123 unsigned char digest[]) 124 { 125 SHA1_CTX context; 126 SHA1Init(&context); 127 SHA1Update(&context, data, length); 128 SHA1Final(digest, &context); 129 } 130 131 /* 132 * Format the SHA-1 digest into the buffer, which must be able to hold at 133 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, 134 */ 135 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) 136 { 137 static const char hexDigit[] = "0123456789abcdef"; 138 char* cp; 139 int i; 140 141 cp = tmpBuf; 142 for (i = 0; i < kSHA1DigestLen; i++) { 143 *cp++ = hexDigit[digest[i] >> 4]; 144 *cp++ = hexDigit[digest[i] & 0x0f]; 145 } 146 *cp++ = '\0'; 147 148 assert(cp == tmpBuf + kSHA1DigestOutputLen); 149 150 return tmpBuf; 151 } 152 153 /* 154 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 155 * 156 * This may or may not be compatible with UTF-8 hash functions used inside 157 * the Dalvik VM. 158 * 159 * The basic "multiply by 31 and add" approach does better on class names 160 * than most other things tried (e.g. adler32). 161 */ 162 static u4 classDescriptorHash(const char* str) 163 { 164 u4 hash = 1; 165 166 while (*str != '\0') 167 hash = hash * 31 + *str++; 168 169 return hash; 170 } 171 172 /* 173 * Add an entry to the class lookup table. We hash the string and probe 174 * until we find an open slot. 175 */ 176 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, 177 int stringOff, int classDefOff, int* pNumProbes) 178 { 179 const char* classDescriptor = 180 (const char*) (pDexFile->baseAddr + stringOff); 181 u4 hash = classDescriptorHash(classDescriptor); 182 int mask = pLookup->numEntries-1; 183 int idx = hash & mask; 184 185 /* 186 * Find the first empty slot. We oversized the table, so this is 187 * guaranteed to finish. 188 */ 189 int probes = 0; 190 while (pLookup->table[idx].classDescriptorOffset != 0) { 191 idx = (idx + 1) & mask; 192 probes++; 193 } 194 //if (probes > 1) 195 // ALOGW("classLookupAdd: probes=%d", probes); 196 197 pLookup->table[idx].classDescriptorHash = hash; 198 pLookup->table[idx].classDescriptorOffset = stringOff; 199 pLookup->table[idx].classDefOffset = classDefOff; 200 *pNumProbes = probes; 201 } 202 203 /* 204 * Create the class lookup hash table. 205 * 206 * Returns newly-allocated storage. 207 */ 208 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) 209 { 210 DexClassLookup* pLookup; 211 int allocSize; 212 int i, numEntries; 213 int numProbes, totalProbes, maxProbes; 214 215 numProbes = totalProbes = maxProbes = 0; 216 217 assert(pDexFile != NULL); 218 219 /* 220 * Using a factor of 3 results in far less probing than a factor of 2, 221 * but almost doubles the flash storage requirements for the bootstrap 222 * DEX files. The overall impact on class loading performance seems 223 * to be minor. We could probably get some performance improvement by 224 * using a secondary hash. 225 */ 226 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); 227 allocSize = offsetof(DexClassLookup, table) 228 + numEntries * sizeof(pLookup->table[0]); 229 230 pLookup = (DexClassLookup*) calloc(1, allocSize); 231 if (pLookup == NULL) 232 return NULL; 233 pLookup->size = allocSize; 234 pLookup->numEntries = numEntries; 235 236 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { 237 const DexClassDef* pClassDef; 238 const char* pString; 239 240 pClassDef = dexGetClassDef(pDexFile, i); 241 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 242 243 classLookupAdd(pDexFile, pLookup, 244 (u1*)pString - pDexFile->baseAddr, 245 (u1*)pClassDef - pDexFile->baseAddr, &numProbes); 246 247 if (numProbes > maxProbes) 248 maxProbes = numProbes; 249 totalProbes += numProbes; 250 } 251 252 ALOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" 253 " total=%d max=%d", 254 pDexFile->pHeader->classDefsSize, numEntries, 255 (100 * pDexFile->pHeader->classDefsSize) / numEntries, 256 allocSize, totalProbes, maxProbes); 257 258 return pLookup; 259 } 260 261 262 /* 263 * Set up the basic raw data pointers of a DexFile. This function isn't 264 * meant for general use. 265 */ 266 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { 267 DexHeader *pHeader = (DexHeader*) data; 268 269 pDexFile->baseAddr = data; 270 pDexFile->pHeader = pHeader; 271 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); 272 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); 273 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); 274 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); 275 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); 276 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); 277 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); 278 } 279 280 /* 281 * Parse an optimized or unoptimized .dex file sitting in memory. This is 282 * called after the byte-ordering and structure alignment has been fixed up. 283 * 284 * On success, return a newly-allocated DexFile. 285 */ 286 DexFile* dexFileParse(const u1* data, size_t length, int flags) 287 { 288 DexFile* pDexFile = NULL; 289 const DexHeader* pHeader; 290 const u1* magic; 291 int result = -1; 292 293 if (length < sizeof(DexHeader)) { 294 ALOGE("too short to be a valid .dex"); 295 goto bail; /* bad file format */ 296 } 297 298 pDexFile = (DexFile*) malloc(sizeof(DexFile)); 299 if (pDexFile == NULL) 300 goto bail; /* alloc failure */ 301 memset(pDexFile, 0, sizeof(DexFile)); 302 303 /* 304 * Peel off the optimized header. 305 */ 306 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { 307 magic = data; 308 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { 309 ALOGE("bad opt version (0x%02x %02x %02x %02x)", 310 magic[4], magic[5], magic[6], magic[7]); 311 goto bail; 312 } 313 314 pDexFile->pOptHeader = (const DexOptHeader*) data; 315 ALOGV("Good opt header, DEX offset is %d, flags=0x%02x", 316 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); 317 318 /* parse the optimized dex file tables */ 319 if (!dexParseOptData(data, length, pDexFile)) 320 goto bail; 321 322 /* ignore the opt header and appended data from here on out */ 323 data += pDexFile->pOptHeader->dexOffset; 324 length -= pDexFile->pOptHeader->dexOffset; 325 if (pDexFile->pOptHeader->dexLength > length) { 326 ALOGE("File truncated? stored len=%d, rem len=%d", 327 pDexFile->pOptHeader->dexLength, (int) length); 328 goto bail; 329 } 330 length = pDexFile->pOptHeader->dexLength; 331 } 332 333 dexFileSetupBasicPointers(pDexFile, data); 334 pHeader = pDexFile->pHeader; 335 336 if (!dexHasValidMagic(pHeader)) { 337 goto bail; 338 } 339 340 /* 341 * Verify the checksum(s). This is reasonably quick, but does require 342 * touching every byte in the DEX file. The base checksum changes after 343 * byte-swapping and DEX optimization. 344 */ 345 if (flags & kDexParseVerifyChecksum) { 346 u4 adler = dexComputeChecksum(pHeader); 347 if (adler != pHeader->checksum) { 348 ALOGE("ERROR: bad checksum (%08x vs %08x)", 349 adler, pHeader->checksum); 350 if (!(flags & kDexParseContinueOnError)) 351 goto bail; 352 } else { 353 ALOGV("+++ adler32 checksum (%08x) verified", adler); 354 } 355 356 const DexOptHeader* pOptHeader = pDexFile->pOptHeader; 357 if (pOptHeader != NULL) { 358 adler = dexComputeOptChecksum(pOptHeader); 359 if (adler != pOptHeader->checksum) { 360 ALOGE("ERROR: bad opt checksum (%08x vs %08x)", 361 adler, pOptHeader->checksum); 362 if (!(flags & kDexParseContinueOnError)) 363 goto bail; 364 } else { 365 ALOGV("+++ adler32 opt checksum (%08x) verified", adler); 366 } 367 } 368 } 369 370 /* 371 * Verify the SHA-1 digest. (Normally we don't want to do this -- 372 * the digest is used to uniquely identify the original DEX file, and 373 * can't be computed for verification after the DEX is byte-swapped 374 * and optimized.) 375 */ 376 if (kVerifySignature) { 377 unsigned char sha1Digest[kSHA1DigestLen]; 378 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + 379 kSHA1DigestLen; 380 381 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); 382 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { 383 char tmpBuf1[kSHA1DigestOutputLen]; 384 char tmpBuf2[kSHA1DigestOutputLen]; 385 ALOGE("ERROR: bad SHA1 digest (%s vs %s)", 386 dexSHA1DigestToStr(sha1Digest, tmpBuf1), 387 dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); 388 if (!(flags & kDexParseContinueOnError)) 389 goto bail; 390 } else { 391 ALOGV("+++ sha1 digest verified"); 392 } 393 } 394 395 if (pHeader->fileSize != length) { 396 ALOGE("ERROR: stored file size (%d) != expected (%d)", 397 (int) pHeader->fileSize, (int) length); 398 if (!(flags & kDexParseContinueOnError)) 399 goto bail; 400 } 401 402 if (pHeader->classDefsSize == 0) { 403 ALOGE("ERROR: DEX file has no classes in it, failing"); 404 goto bail; 405 } 406 407 /* 408 * Success! 409 */ 410 result = 0; 411 412 bail: 413 if (result != 0 && pDexFile != NULL) { 414 dexFileFree(pDexFile); 415 pDexFile = NULL; 416 } 417 return pDexFile; 418 } 419 420 /* 421 * Free up the DexFile and any associated data structures. 422 * 423 * Note we may be called with a partially-initialized DexFile. 424 */ 425 void dexFileFree(DexFile* pDexFile) 426 { 427 if (pDexFile == NULL) 428 return; 429 430 free(pDexFile); 431 } 432 433 /* 434 * Look up a class definition entry by descriptor. 435 * 436 * "descriptor" should look like "Landroid/debug/Stuff;". 437 */ 438 const DexClassDef* dexFindClass(const DexFile* pDexFile, 439 const char* descriptor) 440 { 441 const DexClassLookup* pLookup = pDexFile->pClassLookup; 442 u4 hash; 443 int idx, mask; 444 445 hash = classDescriptorHash(descriptor); 446 mask = pLookup->numEntries - 1; 447 idx = hash & mask; 448 449 /* 450 * Search until we find a matching entry or an empty slot. 451 */ 452 while (true) { 453 int offset; 454 455 offset = pLookup->table[idx].classDescriptorOffset; 456 if (offset == 0) 457 return NULL; 458 459 if (pLookup->table[idx].classDescriptorHash == hash) { 460 const char* str; 461 462 str = (const char*) (pDexFile->baseAddr + offset); 463 if (strcmp(str, descriptor) == 0) { 464 return (const DexClassDef*) 465 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); 466 } 467 } 468 469 idx = (idx + 1) & mask; 470 } 471 } 472 473 474 /* 475 * Compute the DEX file checksum for a memory-mapped DEX file. 476 */ 477 u4 dexComputeChecksum(const DexHeader* pHeader) 478 { 479 const u1* start = (const u1*) pHeader; 480 481 uLong adler = adler32(0L, Z_NULL, 0); 482 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); 483 484 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); 485 } 486 487 /* 488 * Compute the size, in bytes, of a DexCode. 489 */ 490 size_t dexGetDexCodeSize(const DexCode* pCode) 491 { 492 /* 493 * The catch handler data is the last entry. It has a variable number 494 * of variable-size pieces, so we need to create an iterator. 495 */ 496 u4 handlersSize; 497 u4 offset; 498 u4 ui; 499 500 if (pCode->triesSize != 0) { 501 handlersSize = dexGetHandlersSize(pCode); 502 offset = dexGetFirstHandlerOffset(pCode); 503 } else { 504 handlersSize = 0; 505 offset = 0; 506 } 507 508 for (ui = 0; ui < handlersSize; ui++) { 509 DexCatchIterator iterator; 510 dexCatchIteratorInit(&iterator, pCode, offset); 511 offset = dexCatchIteratorGetEndOffset(&iterator, pCode); 512 } 513 514 const u1* handlerData = dexGetCatchHandlerData(pCode); 515 516 //ALOGD("+++ pCode=%p handlerData=%p last offset=%d", 517 // pCode, handlerData, offset); 518 519 /* return the size of the catch handler + everything before it */ 520 return (handlerData - (u1*) pCode) + offset; 521 } 522 523 /* 524 * Round up to the next highest power of 2. 525 * 526 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 527 */ 528 u4 dexRoundUpPower2(u4 val) 529 { 530 val--; 531 val |= val >> 1; 532 val |= val >> 2; 533 val |= val >> 4; 534 val |= val >> 8; 535 val |= val >> 16; 536 val++; 537 538 return val; 539 } 540