Home | History | Annotate | Download | only in libdex
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 /*
     17  * Read-only access to Zip archives, with minimal heap allocation.
     18  */
     19 #include "ZipArchive.h"
     20 
     21 #include <zlib.h>
     22 
     23 #include <stdlib.h>
     24 #include <string.h>
     25 #include <fcntl.h>
     26 #include <errno.h>
     27 
     28 
     29 /*
     30  * Zip file constants.
     31  */
     32 #define kEOCDSignature      0x06054b50
     33 #define kEOCDLen            22
     34 #define kEOCDNumEntries     8               // offset to #of entries in file
     35 #define kEOCDFileOffset     16              // offset to central directory
     36 
     37 #define kMaxCommentLen      65535           // longest possible in ushort
     38 #define kMaxEOCDSearch      (kMaxCommentLen + kEOCDLen)
     39 
     40 #define kLFHSignature       0x04034b50
     41 #define kLFHLen             30              // excluding variable-len fields
     42 #define kLFHNameLen         26              // offset to filename length
     43 #define kLFHExtraLen        28              // offset to extra length
     44 
     45 #define kCDESignature       0x02014b50
     46 #define kCDELen             46              // excluding variable-len fields
     47 #define kCDEMethod          10              // offset to compression method
     48 #define kCDEModWhen         12              // offset to modification timestamp
     49 #define kCDECRC             16              // offset to entry CRC
     50 #define kCDECompLen         20              // offset to compressed length
     51 #define kCDEUncompLen       24              // offset to uncompressed length
     52 #define kCDENameLen         28              // offset to filename length
     53 #define kCDEExtraLen        30              // offset to extra length
     54 #define kCDECommentLen      32              // offset to comment length
     55 #define kCDELocalOffset     42              // offset to local hdr
     56 
     57 /*
     58  * The values we return for ZipEntry use 0 as an invalid value, so we
     59  * want to adjust the hash table index by a fixed amount.  Using a large
     60  * value helps insure that people don't mix & match arguments, e.g. with
     61  * entry indices.
     62  */
     63 #define kZipEntryAdj        10000
     64 
     65 /*
     66  * Convert a ZipEntry to a hash table index, verifying that it's in a
     67  * valid range.
     68  */
     69 static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
     70 {
     71     long ent = ((long) entry) - kZipEntryAdj;
     72     if (ent < 0 || ent >= pArchive->mHashTableSize ||
     73         pArchive->mHashTable[ent].name == NULL)
     74     {
     75         LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent);
     76         return -1;
     77     }
     78     return ent;
     79 }
     80 
     81 /*
     82  * Simple string hash function for non-null-terminated strings.
     83  */
     84 static unsigned int computeHash(const char* str, int len)
     85 {
     86     unsigned int hash = 0;
     87 
     88     while (len--)
     89         hash = hash * 31 + *str++;
     90 
     91     return hash;
     92 }
     93 
     94 /*
     95  * Add a new entry to the hash table.
     96  */
     97 static void addToHash(ZipArchive* pArchive, const char* str, int strLen,
     98     unsigned int hash)
     99 {
    100     const int hashTableSize = pArchive->mHashTableSize;
    101     int ent = hash & (hashTableSize - 1);
    102 
    103     /*
    104      * We over-allocated the table, so we're guaranteed to find an empty slot.
    105      */
    106     while (pArchive->mHashTable[ent].name != NULL)
    107         ent = (ent + 1) & (hashTableSize-1);
    108 
    109     pArchive->mHashTable[ent].name = str;
    110     pArchive->mHashTable[ent].nameLen = strLen;
    111 }
    112 
    113 /*
    114  * Get 2 little-endian bytes.
    115  */
    116 static u2 get2LE(unsigned char const* pSrc)
    117 {
    118     return pSrc[0] | (pSrc[1] << 8);
    119 }
    120 
    121 /*
    122  * Get 4 little-endian bytes.
    123  */
    124 static u4 get4LE(unsigned char const* pSrc)
    125 {
    126     u4 result;
    127 
    128     result = pSrc[0];
    129     result |= pSrc[1] << 8;
    130     result |= pSrc[2] << 16;
    131     result |= pSrc[3] << 24;
    132 
    133     return result;
    134 }
    135 
    136 /*
    137  * Parse the Zip archive, verifying its contents and initializing internal
    138  * data structures.
    139  */
    140 static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
    141 {
    142 #define CHECK_OFFSET(_off) {                                                \
    143         if ((unsigned int) (_off) >= maxOffset) {                           \
    144             LOGE("ERROR: bad offset %u (max %d): %s\n",                     \
    145                 (unsigned int) (_off), maxOffset, #_off);                   \
    146             goto bail;                                                      \
    147         }                                                                   \
    148     }
    149     bool result = false;
    150     const unsigned char* basePtr = (const unsigned char*)pMap->addr;
    151     const unsigned char* ptr;
    152     size_t length = pMap->length;
    153     unsigned int i, numEntries, cdOffset;
    154     unsigned int val;
    155 
    156     /*
    157      * The first 4 bytes of the file will either be the local header
    158      * signature for the first file (kLFHSignature) or, if the archive doesn't
    159      * have any files in it, the end-of-central-directory signature
    160      * (kEOCDSignature).
    161      */
    162     val = get4LE(basePtr);
    163     if (val == kEOCDSignature) {
    164         LOGI("Found Zip archive, but it looks empty\n");
    165         goto bail;
    166     } else if (val != kLFHSignature) {
    167         LOGV("Not a Zip archive (found 0x%08x)\n", val);
    168         goto bail;
    169     }
    170 
    171     /*
    172      * Find the EOCD.  We'll find it immediately unless they have a file
    173      * comment.
    174      */
    175     ptr = basePtr + length - kEOCDLen;
    176 
    177     while (ptr >= basePtr) {
    178         if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature)
    179             break;
    180         ptr--;
    181     }
    182     if (ptr < basePtr) {
    183         LOGI("Could not find end-of-central-directory in Zip\n");
    184         goto bail;
    185     }
    186 
    187     /*
    188      * There are two interesting items in the EOCD block: the number of
    189      * entries in the file, and the file offset of the start of the
    190      * central directory.
    191      *
    192      * (There's actually a count of the #of entries in this file, and for
    193      * all files which comprise a spanned archive, but for our purposes
    194      * we're only interested in the current file.  Besides, we expect the
    195      * two to be equivalent for our stuff.)
    196      */
    197     numEntries = get2LE(ptr + kEOCDNumEntries);
    198     cdOffset = get4LE(ptr + kEOCDFileOffset);
    199 
    200     /* valid offsets are [0,EOCD] */
    201     unsigned int maxOffset;
    202     maxOffset = (ptr - basePtr) +1;
    203 
    204     LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
    205     if (numEntries == 0 || cdOffset >= length) {
    206         LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
    207             numEntries, cdOffset, length);
    208         goto bail;
    209     }
    210 
    211     /*
    212      * Create hash table.  We have a minimum 75% load factor, possibly as
    213      * low as 50% after we round off to a power of 2.  There must be at
    214      * least one unused entry to avoid an infinite loop during creation.
    215      */
    216     pArchive->mNumEntries = numEntries;
    217     pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
    218     pArchive->mHashTable = (ZipHashEntry*)
    219             calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
    220 
    221     /*
    222      * Walk through the central directory, adding entries to the hash
    223      * table.
    224      */
    225     ptr = basePtr + cdOffset;
    226     for (i = 0; i < numEntries; i++) {
    227         unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
    228         const unsigned char* localHdr;
    229         unsigned int hash;
    230 
    231         if (get4LE(ptr) != kCDESignature) {
    232             LOGW("Missed a central dir sig (at %d)\n", i);
    233             goto bail;
    234         }
    235         if (ptr + kCDELen > basePtr + length) {
    236             LOGW("Ran off the end (at %d)\n", i);
    237             goto bail;
    238         }
    239 
    240         localHdrOffset = get4LE(ptr + kCDELocalOffset);
    241         CHECK_OFFSET(localHdrOffset);
    242         fileNameLen = get2LE(ptr + kCDENameLen);
    243         extraLen = get2LE(ptr + kCDEExtraLen);
    244         commentLen = get2LE(ptr + kCDECommentLen);
    245 
    246         //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n",
    247         //    i, localHdrOffset, fileNameLen, extraLen, commentLen);
    248         //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen);
    249 
    250         /* add the CDE filename to the hash table */
    251         hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
    252         addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
    253 
    254         localHdr = basePtr + localHdrOffset;
    255         if (get4LE(localHdr) != kLFHSignature) {
    256             LOGW("Bad offset to local header: %d (at %d)\n",
    257                 localHdrOffset, i);
    258             goto bail;
    259         }
    260 
    261         ptr += kCDELen + fileNameLen + extraLen + commentLen;
    262         CHECK_OFFSET(ptr - basePtr);
    263     }
    264 
    265     result = true;
    266 
    267 bail:
    268     return result;
    269 #undef CHECK_OFFSET
    270 }
    271 
    272 /*
    273  * Open the specified file read-only.  We memory-map the entire thing and
    274  * parse the contents.
    275  *
    276  * This will be called on non-Zip files, especially during VM startup, so
    277  * we don't want to be too noisy about certain types of failure.  (Do
    278  * we want a "quiet" flag?)
    279  *
    280  * On success, we fill out the contents of "pArchive" and return 0.
    281  */
    282 int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
    283 {
    284     int fd, err;
    285 
    286     LOGV("Opening archive '%s' %p\n", fileName, pArchive);
    287 
    288     memset(pArchive, 0, sizeof(ZipArchive));
    289 
    290     fd = open(fileName, O_RDONLY, 0);
    291     if (fd < 0) {
    292         err = errno ? errno : -1;
    293         LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
    294         return err;
    295     }
    296 
    297     return dexZipPrepArchive(fd, fileName, pArchive);
    298 }
    299 
    300 /*
    301  * Prepare to access a ZipArchive in an open file descriptor.
    302  */
    303 int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
    304 {
    305     MemMapping map;
    306     int err;
    307 
    308     map.addr = NULL;
    309     memset(pArchive, 0, sizeof(*pArchive));
    310 
    311     pArchive->mFd = fd;
    312 
    313     if (sysMapFileInShmemReadOnly(pArchive->mFd, &map) != 0) {
    314         err = -1;
    315         LOGW("Map of '%s' failed\n", debugFileName);
    316         goto bail;
    317     }
    318 
    319     if (map.length < kEOCDLen) {
    320         err = -1;
    321         LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length);
    322         goto bail;
    323     }
    324 
    325     if (!parseZipArchive(pArchive, &map)) {
    326         err = -1;
    327         LOGV("Parsing '%s' failed\n", debugFileName);
    328         goto bail;
    329     }
    330 
    331     /* success */
    332     err = 0;
    333     sysCopyMap(&pArchive->mMap, &map);
    334     map.addr = NULL;
    335 
    336 bail:
    337     if (err != 0)
    338         dexZipCloseArchive(pArchive);
    339     if (map.addr != NULL)
    340         sysReleaseShmem(&map);
    341     return err;
    342 }
    343 
    344 
    345 /*
    346  * Close a ZipArchive, closing the file and freeing the contents.
    347  *
    348  * NOTE: the ZipArchive may not have been fully created.
    349  */
    350 void dexZipCloseArchive(ZipArchive* pArchive)
    351 {
    352     LOGV("Closing archive %p\n", pArchive);
    353 
    354     if (pArchive->mFd >= 0)
    355         close(pArchive->mFd);
    356 
    357     sysReleaseShmem(&pArchive->mMap);
    358 
    359     free(pArchive->mHashTable);
    360 
    361     pArchive->mFd = -1;
    362     pArchive->mNumEntries = -1;
    363     pArchive->mHashTableSize = -1;
    364     pArchive->mHashTable = NULL;
    365 }
    366 
    367 
    368 /*
    369  * Find a matching entry.
    370  *
    371  * Returns 0 if not found.
    372  */
    373 ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
    374 {
    375     int nameLen = strlen(entryName);
    376     unsigned int hash = computeHash(entryName, nameLen);
    377     const int hashTableSize = pArchive->mHashTableSize;
    378     int ent = hash & (hashTableSize-1);
    379 
    380     while (pArchive->mHashTable[ent].name != NULL) {
    381         if (pArchive->mHashTable[ent].nameLen == nameLen &&
    382             memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
    383         {
    384             /* match */
    385             return (ZipEntry) (ent + kZipEntryAdj);
    386         }
    387 
    388         ent = (ent + 1) & (hashTableSize-1);
    389     }
    390 
    391     return NULL;
    392 }
    393 
    394 #if 0
    395 /*
    396  * Find the Nth entry.
    397  *
    398  * This currently involves walking through the sparse hash table, counting
    399  * non-empty entries.  If we need to speed this up we can either allocate
    400  * a parallel lookup table or (perhaps better) provide an iterator interface.
    401  */
    402 ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
    403 {
    404     if (idx < 0 || idx >= pArchive->mNumEntries) {
    405         LOGW("Invalid index %d\n", idx);
    406         return NULL;
    407     }
    408 
    409     int ent;
    410     for (ent = 0; ent < pArchive->mHashTableSize; ent++) {
    411         if (pArchive->mHashTable[ent].name != NULL) {
    412             if (idx-- == 0)
    413                 return (ZipEntry) (ent + kZipEntryAdj);
    414         }
    415     }
    416 
    417     return NULL;
    418 }
    419 #endif
    420 
    421 /*
    422  * Get the useful fields from the zip entry.
    423  *
    424  * Returns "false" if the offsets to the fields or the contents of the fields
    425  * appear to be bogus.
    426  */
    427 bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
    428     int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset,
    429     long* pModWhen, long* pCrc32)
    430 {
    431     int ent = entryToIndex(pArchive, entry);
    432     if (ent < 0)
    433         return false;
    434 
    435     /*
    436      * Recover the start of the central directory entry from the filename
    437      * pointer.
    438      */
    439     const unsigned char* basePtr = (const unsigned char*)
    440         pArchive->mMap.addr;
    441     const unsigned char* ptr = (const unsigned char*)
    442         pArchive->mHashTable[ent].name;
    443     size_t zipLength =
    444         pArchive->mMap.length;
    445 
    446     ptr -= kCDELen;
    447 
    448     int method = get2LE(ptr + kCDEMethod);
    449     if (pMethod != NULL)
    450         *pMethod = method;
    451 
    452     if (pModWhen != NULL)
    453         *pModWhen = get4LE(ptr + kCDEModWhen);
    454     if (pCrc32 != NULL)
    455         *pCrc32 = get4LE(ptr + kCDECRC);
    456 
    457     /*
    458      * We need to make sure that the lengths are not so large that somebody
    459      * trying to map the compressed or uncompressed data runs off the end
    460      * of the mapped region.
    461      */
    462     unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset);
    463     if (localHdrOffset + kLFHLen >= zipLength) {
    464         LOGE("ERROR: bad local hdr offset in zip\n");
    465         return false;
    466     }
    467     const unsigned char* localHdr = basePtr + localHdrOffset;
    468     off_t dataOffset = localHdrOffset + kLFHLen
    469         + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen);
    470     if ((unsigned long) dataOffset >= zipLength) {
    471         LOGE("ERROR: bad data offset in zip\n");
    472         return false;
    473     }
    474 
    475     if (pCompLen != NULL) {
    476         *pCompLen = get4LE(ptr + kCDECompLen);
    477         if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) {
    478             LOGE("ERROR: bad compressed length in zip\n");
    479             return false;
    480         }
    481     }
    482     if (pUncompLen != NULL) {
    483         *pUncompLen = get4LE(ptr + kCDEUncompLen);
    484         if (*pUncompLen < 0) {
    485             LOGE("ERROR: negative uncompressed length in zip\n");
    486             return false;
    487         }
    488         if (method == kCompressStored &&
    489             (size_t)(dataOffset + *pUncompLen) >= zipLength)
    490         {
    491             LOGE("ERROR: bad uncompressed length in zip\n");
    492             return false;
    493         }
    494     }
    495 
    496     if (pOffset != NULL) {
    497         *pOffset = dataOffset;
    498     }
    499     return true;
    500 }
    501 
    502 /*
    503  * Uncompress "deflate" data from one buffer to an open file descriptor.
    504  */
    505 static bool inflateToFile(int fd, const void* inBuf, long uncompLen,
    506     long compLen)
    507 {
    508     bool result = false;
    509     const int kWriteBufSize = 32768;
    510     unsigned char writeBuf[kWriteBufSize];
    511     z_stream zstream;
    512     int zerr;
    513 
    514     /*
    515      * Initialize the zlib stream struct.
    516      */
    517 	memset(&zstream, 0, sizeof(zstream));
    518     zstream.zalloc = Z_NULL;
    519     zstream.zfree = Z_NULL;
    520     zstream.opaque = Z_NULL;
    521     zstream.next_in = (Bytef*)inBuf;
    522     zstream.avail_in = compLen;
    523     zstream.next_out = (Bytef*) writeBuf;
    524     zstream.avail_out = sizeof(writeBuf);
    525     zstream.data_type = Z_UNKNOWN;
    526 
    527 	/*
    528 	 * Use the undocumented "negative window bits" feature to tell zlib
    529 	 * that there's no zlib header waiting for it.
    530 	 */
    531     zerr = inflateInit2(&zstream, -MAX_WBITS);
    532     if (zerr != Z_OK) {
    533         if (zerr == Z_VERSION_ERROR) {
    534             LOGE("Installed zlib is not compatible with linked version (%s)\n",
    535                 ZLIB_VERSION);
    536         } else {
    537             LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
    538         }
    539         goto bail;
    540     }
    541 
    542     /*
    543      * Loop while we have more to do.
    544      */
    545     do {
    546         /*
    547          * Expand data.
    548          */
    549         zerr = inflate(&zstream, Z_NO_FLUSH);
    550         if (zerr != Z_OK && zerr != Z_STREAM_END) {
    551             LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
    552                 zerr, zstream.next_in, zstream.avail_in,
    553                 zstream.next_out, zstream.avail_out);
    554             goto z_bail;
    555         }
    556 
    557         /* write when we're full or when we're done */
    558         if (zstream.avail_out == 0 ||
    559             (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
    560         {
    561             long writeSize = zstream.next_out - writeBuf;
    562             int cc = write(fd, writeBuf, writeSize);
    563             if (cc != (int) writeSize) {
    564                 if (cc < 0) {
    565                     LOGW("write failed in inflate: %s\n", strerror(errno));
    566                 } else {
    567                     LOGW("partial write in inflate (%d vs %ld)\n",
    568                         cc, writeSize);
    569                 }
    570                 goto z_bail;
    571             }
    572 
    573             zstream.next_out = writeBuf;
    574             zstream.avail_out = sizeof(writeBuf);
    575         }
    576     } while (zerr == Z_OK);
    577 
    578     assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
    579 
    580     /* paranoia */
    581     if ((long) zstream.total_out != uncompLen) {
    582         LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
    583             zstream.total_out, uncompLen);
    584         goto z_bail;
    585     }
    586 
    587     result = true;
    588 
    589 z_bail:
    590     inflateEnd(&zstream);        /* free up any allocated structures */
    591 
    592 bail:
    593     return result;
    594 }
    595 
    596 /*
    597  * Uncompress an entry, in its entirety, to an open file descriptor.
    598  *
    599  * TODO: this doesn't verify the data's CRC, but probably should (especially
    600  * for uncompressed data).
    601  */
    602 bool dexZipExtractEntryToFile(const ZipArchive* pArchive,
    603     const ZipEntry entry, int fd)
    604 {
    605     bool result = false;
    606     int ent = entryToIndex(pArchive, entry);
    607     if (ent < 0)
    608         return -1;
    609 
    610     const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr;
    611     int method;
    612     long uncompLen, compLen;
    613     off_t offset;
    614 
    615     if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
    616             &offset, NULL, NULL))
    617     {
    618         goto bail;
    619     }
    620 
    621     if (method == kCompressStored) {
    622         ssize_t actual;
    623 
    624         actual = write(fd, basePtr + offset, uncompLen);
    625         if (actual < 0) {
    626             LOGE("Write failed: %s\n", strerror(errno));
    627             goto bail;
    628         } else if (actual != uncompLen) {
    629             LOGE("Partial write during uncompress (%d of %ld)\n",
    630                 (int) actual, uncompLen);
    631             goto bail;
    632         } else {
    633             LOGI("+++ successful write\n");
    634         }
    635     } else {
    636         if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen))
    637             goto bail;
    638     }
    639 
    640     result = true;
    641 
    642 bail:
    643     return result;
    644 }
    645 
    646