1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Access the contents of a .dex file. 19 */ 20 21 #include "DexFile.h" 22 #include "DexOptData.h" 23 #include "DexProto.h" 24 #include "DexCatch.h" 25 #include "Leb128.h" 26 #include "sha1.h" 27 #include "ZipArchive.h" 28 29 #include <zlib.h> 30 31 #include <stdlib.h> 32 #include <stddef.h> 33 #include <string.h> 34 #include <fcntl.h> 35 #include <errno.h> 36 37 38 /* 39 * Verifying checksums is good, but it slows things down and causes us to 40 * touch every page. In the "optimized" world, it doesn't work at all, 41 * because we rewrite the contents. 42 */ 43 static const bool kVerifyChecksum = false; 44 static const bool kVerifySignature = false; 45 46 /* (documented in header) */ 47 char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) { 48 const char* string = dexGetPrimitiveTypeDescriptor(type); 49 50 return (string == NULL) ? '\0' : string[0]; 51 } 52 53 /* (documented in header) */ 54 const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) { 55 switch (type) { 56 case PRIM_VOID: return "V"; 57 case PRIM_BOOLEAN: return "Z"; 58 case PRIM_BYTE: return "B"; 59 case PRIM_SHORT: return "S"; 60 case PRIM_CHAR: return "C"; 61 case PRIM_INT: return "I"; 62 case PRIM_LONG: return "J"; 63 case PRIM_FLOAT: return "F"; 64 case PRIM_DOUBLE: return "D"; 65 default: return NULL; 66 } 67 68 return NULL; 69 } 70 71 /* (documented in header) */ 72 const char* dexGetBoxedTypeDescriptor(PrimitiveType type) { 73 switch (type) { 74 case PRIM_VOID: return NULL; 75 case PRIM_BOOLEAN: return "Ljava/lang/Boolean;"; 76 case PRIM_BYTE: return "Ljava/lang/Byte;"; 77 case PRIM_SHORT: return "Ljava/lang/Short;"; 78 case PRIM_CHAR: return "Ljava/lang/Character;"; 79 case PRIM_INT: return "Ljava/lang/Integer;"; 80 case PRIM_LONG: return "Ljava/lang/Long;"; 81 case PRIM_FLOAT: return "Ljava/lang/Float;"; 82 case PRIM_DOUBLE: return "Ljava/lang/Double;"; 83 default: return NULL; 84 } 85 } 86 87 /* (documented in header) */ 88 PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) { 89 switch (descriptorChar) { 90 case 'V': return PRIM_VOID; 91 case 'Z': return PRIM_BOOLEAN; 92 case 'B': return PRIM_BYTE; 93 case 'S': return PRIM_SHORT; 94 case 'C': return PRIM_CHAR; 95 case 'I': return PRIM_INT; 96 case 'J': return PRIM_LONG; 97 case 'F': return PRIM_FLOAT; 98 case 'D': return PRIM_DOUBLE; 99 default: return PRIM_NOT; 100 } 101 } 102 103 /* Return the UTF-8 encoded string with the specified string_id index, 104 * also filling in the UTF-16 size (number of 16-bit code points).*/ 105 const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, 106 u4* utf16Size) { 107 const DexStringId* pStringId = dexGetStringId(pDexFile, idx); 108 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; 109 110 *utf16Size = readUnsignedLeb128(&ptr); 111 return (const char*) ptr; 112 } 113 114 /* 115 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at 116 * least kSHA1DigestOutputLen bytes. 117 */ 118 const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); 119 120 /* 121 * Compute a SHA-1 digest on a range of bytes. 122 */ 123 static void dexComputeSHA1Digest(const unsigned char* data, size_t length, 124 unsigned char digest[]) 125 { 126 SHA1_CTX context; 127 SHA1Init(&context); 128 SHA1Update(&context, data, length); 129 SHA1Final(digest, &context); 130 } 131 132 /* 133 * Format the SHA-1 digest into the buffer, which must be able to hold at 134 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, 135 */ 136 static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) 137 { 138 static const char hexDigit[] = "0123456789abcdef"; 139 char* cp; 140 int i; 141 142 cp = tmpBuf; 143 for (i = 0; i < kSHA1DigestLen; i++) { 144 *cp++ = hexDigit[digest[i] >> 4]; 145 *cp++ = hexDigit[digest[i] & 0x0f]; 146 } 147 *cp++ = '\0'; 148 149 assert(cp == tmpBuf + kSHA1DigestOutputLen); 150 151 return tmpBuf; 152 } 153 154 /* 155 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 156 * 157 * This may or may not be compatible with UTF-8 hash functions used inside 158 * the Dalvik VM. 159 * 160 * The basic "multiply by 31 and add" approach does better on class names 161 * than most other things tried (e.g. adler32). 162 */ 163 static u4 classDescriptorHash(const char* str) 164 { 165 u4 hash = 1; 166 167 while (*str != '\0') 168 hash = hash * 31 + *str++; 169 170 return hash; 171 } 172 173 /* 174 * Add an entry to the class lookup table. We hash the string and probe 175 * until we find an open slot. 176 */ 177 static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, 178 int stringOff, int classDefOff, int* pNumProbes) 179 { 180 const char* classDescriptor = 181 (const char*) (pDexFile->baseAddr + stringOff); 182 const DexClassDef* pClassDef = 183 (const DexClassDef*) (pDexFile->baseAddr + classDefOff); 184 u4 hash = classDescriptorHash(classDescriptor); 185 int mask = pLookup->numEntries-1; 186 int idx = hash & mask; 187 188 /* 189 * Find the first empty slot. We oversized the table, so this is 190 * guaranteed to finish. 191 */ 192 int probes = 0; 193 while (pLookup->table[idx].classDescriptorOffset != 0) { 194 idx = (idx + 1) & mask; 195 probes++; 196 } 197 //if (probes > 1) 198 // LOGW("classLookupAdd: probes=%d", probes); 199 200 pLookup->table[idx].classDescriptorHash = hash; 201 pLookup->table[idx].classDescriptorOffset = stringOff; 202 pLookup->table[idx].classDefOffset = classDefOff; 203 *pNumProbes = probes; 204 } 205 206 /* 207 * Create the class lookup hash table. 208 * 209 * Returns newly-allocated storage. 210 */ 211 DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) 212 { 213 DexClassLookup* pLookup; 214 int allocSize; 215 int i, numEntries; 216 int numProbes, totalProbes, maxProbes; 217 218 numProbes = totalProbes = maxProbes = 0; 219 220 assert(pDexFile != NULL); 221 222 /* 223 * Using a factor of 3 results in far less probing than a factor of 2, 224 * but almost doubles the flash storage requirements for the bootstrap 225 * DEX files. The overall impact on class loading performance seems 226 * to be minor. We could probably get some performance improvement by 227 * using a secondary hash. 228 */ 229 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); 230 allocSize = offsetof(DexClassLookup, table) 231 + numEntries * sizeof(pLookup->table[0]); 232 233 pLookup = (DexClassLookup*) calloc(1, allocSize); 234 if (pLookup == NULL) 235 return NULL; 236 pLookup->size = allocSize; 237 pLookup->numEntries = numEntries; 238 239 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { 240 const DexClassDef* pClassDef; 241 const char* pString; 242 243 pClassDef = dexGetClassDef(pDexFile, i); 244 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 245 246 classLookupAdd(pDexFile, pLookup, 247 (u1*)pString - pDexFile->baseAddr, 248 (u1*)pClassDef - pDexFile->baseAddr, &numProbes); 249 250 if (numProbes > maxProbes) 251 maxProbes = numProbes; 252 totalProbes += numProbes; 253 } 254 255 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" 256 " total=%d max=%d", 257 pDexFile->pHeader->classDefsSize, numEntries, 258 (100 * pDexFile->pHeader->classDefsSize) / numEntries, 259 allocSize, totalProbes, maxProbes); 260 261 return pLookup; 262 } 263 264 265 /* 266 * Set up the basic raw data pointers of a DexFile. This function isn't 267 * meant for general use. 268 */ 269 void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { 270 DexHeader *pHeader = (DexHeader*) data; 271 272 pDexFile->baseAddr = data; 273 pDexFile->pHeader = pHeader; 274 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); 275 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); 276 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); 277 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); 278 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); 279 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); 280 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); 281 } 282 283 /* 284 * Parse an optimized or unoptimized .dex file sitting in memory. This is 285 * called after the byte-ordering and structure alignment has been fixed up. 286 * 287 * On success, return a newly-allocated DexFile. 288 */ 289 DexFile* dexFileParse(const u1* data, size_t length, int flags) 290 { 291 DexFile* pDexFile = NULL; 292 const DexHeader* pHeader; 293 const u1* magic; 294 int result = -1; 295 296 if (length < sizeof(DexHeader)) { 297 LOGE("too short to be a valid .dex"); 298 goto bail; /* bad file format */ 299 } 300 301 pDexFile = (DexFile*) malloc(sizeof(DexFile)); 302 if (pDexFile == NULL) 303 goto bail; /* alloc failure */ 304 memset(pDexFile, 0, sizeof(DexFile)); 305 306 /* 307 * Peel off the optimized header. 308 */ 309 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { 310 magic = data; 311 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { 312 LOGE("bad opt version (0x%02x %02x %02x %02x)", 313 magic[4], magic[5], magic[6], magic[7]); 314 goto bail; 315 } 316 317 pDexFile->pOptHeader = (const DexOptHeader*) data; 318 LOGV("Good opt header, DEX offset is %d, flags=0x%02x", 319 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); 320 321 /* parse the optimized dex file tables */ 322 if (!dexParseOptData(data, length, pDexFile)) 323 goto bail; 324 325 /* ignore the opt header and appended data from here on out */ 326 data += pDexFile->pOptHeader->dexOffset; 327 length -= pDexFile->pOptHeader->dexOffset; 328 if (pDexFile->pOptHeader->dexLength > length) { 329 LOGE("File truncated? stored len=%d, rem len=%d", 330 pDexFile->pOptHeader->dexLength, (int) length); 331 goto bail; 332 } 333 length = pDexFile->pOptHeader->dexLength; 334 } 335 336 dexFileSetupBasicPointers(pDexFile, data); 337 pHeader = pDexFile->pHeader; 338 339 if (!dexHasValidMagic(pHeader)) { 340 goto bail; 341 } 342 343 /* 344 * Verify the checksum(s). This is reasonably quick, but does require 345 * touching every byte in the DEX file. The base checksum changes after 346 * byte-swapping and DEX optimization. 347 */ 348 if (flags & kDexParseVerifyChecksum) { 349 u4 adler = dexComputeChecksum(pHeader); 350 if (adler != pHeader->checksum) { 351 LOGE("ERROR: bad checksum (%08x vs %08x)", 352 adler, pHeader->checksum); 353 if (!(flags & kDexParseContinueOnError)) 354 goto bail; 355 } else { 356 LOGV("+++ adler32 checksum (%08x) verified", adler); 357 } 358 359 const DexOptHeader* pOptHeader = pDexFile->pOptHeader; 360 if (pOptHeader != NULL) { 361 adler = dexComputeOptChecksum(pOptHeader); 362 if (adler != pOptHeader->checksum) { 363 LOGE("ERROR: bad opt checksum (%08x vs %08x)", 364 adler, pOptHeader->checksum); 365 if (!(flags & kDexParseContinueOnError)) 366 goto bail; 367 } else { 368 LOGV("+++ adler32 opt checksum (%08x) verified", adler); 369 } 370 } 371 } 372 373 /* 374 * Verify the SHA-1 digest. (Normally we don't want to do this -- 375 * the digest is used to uniquely identify the original DEX file, and 376 * can't be computed for verification after the DEX is byte-swapped 377 * and optimized.) 378 */ 379 if (kVerifySignature) { 380 unsigned char sha1Digest[kSHA1DigestLen]; 381 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + 382 kSHA1DigestLen; 383 384 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); 385 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { 386 char tmpBuf1[kSHA1DigestOutputLen]; 387 char tmpBuf2[kSHA1DigestOutputLen]; 388 LOGE("ERROR: bad SHA1 digest (%s vs %s)", 389 dexSHA1DigestToStr(sha1Digest, tmpBuf1), 390 dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); 391 if (!(flags & kDexParseContinueOnError)) 392 goto bail; 393 } else { 394 LOGV("+++ sha1 digest verified"); 395 } 396 } 397 398 if (pHeader->fileSize != length) { 399 LOGE("ERROR: stored file size (%d) != expected (%d)", 400 (int) pHeader->fileSize, (int) length); 401 if (!(flags & kDexParseContinueOnError)) 402 goto bail; 403 } 404 405 if (pHeader->classDefsSize == 0) { 406 LOGE("ERROR: DEX file has no classes in it, failing"); 407 goto bail; 408 } 409 410 /* 411 * Success! 412 */ 413 result = 0; 414 415 bail: 416 if (result != 0 && pDexFile != NULL) { 417 dexFileFree(pDexFile); 418 pDexFile = NULL; 419 } 420 return pDexFile; 421 } 422 423 /* 424 * Free up the DexFile and any associated data structures. 425 * 426 * Note we may be called with a partially-initialized DexFile. 427 */ 428 void dexFileFree(DexFile* pDexFile) 429 { 430 if (pDexFile == NULL) 431 return; 432 433 free(pDexFile); 434 } 435 436 /* 437 * Look up a class definition entry by descriptor. 438 * 439 * "descriptor" should look like "Landroid/debug/Stuff;". 440 */ 441 const DexClassDef* dexFindClass(const DexFile* pDexFile, 442 const char* descriptor) 443 { 444 const DexClassLookup* pLookup = pDexFile->pClassLookup; 445 u4 hash; 446 int idx, mask; 447 448 hash = classDescriptorHash(descriptor); 449 mask = pLookup->numEntries - 1; 450 idx = hash & mask; 451 452 /* 453 * Search until we find a matching entry or an empty slot. 454 */ 455 while (true) { 456 int offset; 457 458 offset = pLookup->table[idx].classDescriptorOffset; 459 if (offset == 0) 460 return NULL; 461 462 if (pLookup->table[idx].classDescriptorHash == hash) { 463 const char* str; 464 465 str = (const char*) (pDexFile->baseAddr + offset); 466 if (strcmp(str, descriptor) == 0) { 467 return (const DexClassDef*) 468 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); 469 } 470 } 471 472 idx = (idx + 1) & mask; 473 } 474 } 475 476 477 /* 478 * Compute the DEX file checksum for a memory-mapped DEX file. 479 */ 480 u4 dexComputeChecksum(const DexHeader* pHeader) 481 { 482 const u1* start = (const u1*) pHeader; 483 484 uLong adler = adler32(0L, Z_NULL, 0); 485 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); 486 487 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); 488 } 489 490 /* 491 * Compute the size, in bytes, of a DexCode. 492 */ 493 size_t dexGetDexCodeSize(const DexCode* pCode) 494 { 495 /* 496 * The catch handler data is the last entry. It has a variable number 497 * of variable-size pieces, so we need to create an iterator. 498 */ 499 u4 handlersSize; 500 u4 offset; 501 u4 ui; 502 503 if (pCode->triesSize != 0) { 504 handlersSize = dexGetHandlersSize(pCode); 505 offset = dexGetFirstHandlerOffset(pCode); 506 } else { 507 handlersSize = 0; 508 offset = 0; 509 } 510 511 for (ui = 0; ui < handlersSize; ui++) { 512 DexCatchIterator iterator; 513 dexCatchIteratorInit(&iterator, pCode, offset); 514 offset = dexCatchIteratorGetEndOffset(&iterator, pCode); 515 } 516 517 const u1* handlerData = dexGetCatchHandlerData(pCode); 518 519 //LOGD("+++ pCode=%p handlerData=%p last offset=%d", 520 // pCode, handlerData, offset); 521 522 /* return the size of the catch handler + everything before it */ 523 return (handlerData - (u1*) pCode) + offset; 524 } 525 526 /* 527 * Round up to the next highest power of 2. 528 * 529 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 530 */ 531 u4 dexRoundUpPower2(u4 val) 532 { 533 val--; 534 val |= val >> 1; 535 val |= val >> 2; 536 val |= val >> 4; 537 val |= val >> 8; 538 val |= val >> 16; 539 val++; 540 541 return val; 542 } 543