Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //
     18 // Read-only access to Zip archives, with minimal heap allocation.
     19 //
     20 #define LOG_TAG "zipro"
     21 //#define LOG_NDEBUG 0
     22 #include <utils/Log.h>
     23 #include <utils/ZipFileRO.h>
     24 #include <utils/misc.h>
     25 #include <utils/threads.h>
     26 
     27 #include <zlib.h>
     28 
     29 #include <string.h>
     30 #include <fcntl.h>
     31 #include <errno.h>
     32 #include <assert.h>
     33 #include <unistd.h>
     34 
     35 #if HAVE_PRINTF_ZD
     36 #  define ZD "%zd"
     37 #  define ZD_TYPE ssize_t
     38 #else
     39 #  define ZD "%ld"
     40 #  define ZD_TYPE long
     41 #endif
     42 
     43 /*
     44  * We must open binary files using open(path, ... | O_BINARY) under Windows.
     45  * Otherwise strange read errors will happen.
     46  */
     47 #ifndef O_BINARY
     48 #  define O_BINARY  0
     49 #endif
     50 
     51 /*
     52  * TEMP_FAILURE_RETRY is defined by some, but not all, versions of
     53  * <unistd.h>. (Alas, it is not as standard as we'd hoped!) So, if it's
     54  * not already defined, then define it here.
     55  */
     56 #ifndef TEMP_FAILURE_RETRY
     57 /* Used to retry syscalls that can return EINTR. */
     58 #define TEMP_FAILURE_RETRY(exp) ({         \
     59     typeof (exp) _rc;                      \
     60     do {                                   \
     61         _rc = (exp);                       \
     62     } while (_rc == -1 && errno == EINTR); \
     63     _rc; })
     64 #endif
     65 
     66 using namespace android;
     67 
     68 /*
     69  * Zip file constants.
     70  */
     71 #define kEOCDSignature      0x06054b50
     72 #define kEOCDLen            22
     73 #define kEOCDNumEntries     8               // offset to #of entries in file
     74 #define kEOCDSize           12              // size of the central directory
     75 #define kEOCDFileOffset     16              // offset to central directory
     76 
     77 #define kMaxCommentLen      65535           // longest possible in ushort
     78 #define kMaxEOCDSearch      (kMaxCommentLen + kEOCDLen)
     79 
     80 #define kLFHSignature       0x04034b50
     81 #define kLFHLen             30              // excluding variable-len fields
     82 #define kLFHNameLen         26              // offset to filename length
     83 #define kLFHExtraLen        28              // offset to extra length
     84 
     85 #define kCDESignature       0x02014b50
     86 #define kCDELen             46              // excluding variable-len fields
     87 #define kCDEMethod          10              // offset to compression method
     88 #define kCDEModWhen         12              // offset to modification timestamp
     89 #define kCDECRC             16              // offset to entry CRC
     90 #define kCDECompLen         20              // offset to compressed length
     91 #define kCDEUncompLen       24              // offset to uncompressed length
     92 #define kCDENameLen         28              // offset to filename length
     93 #define kCDEExtraLen        30              // offset to extra length
     94 #define kCDECommentLen      32              // offset to comment length
     95 #define kCDELocalOffset     42              // offset to local hdr
     96 
     97 /*
     98  * The values we return for ZipEntryRO use 0 as an invalid value, so we
     99  * want to adjust the hash table index by a fixed amount.  Using a large
    100  * value helps insure that people don't mix & match arguments, e.g. to
    101  * findEntryByIndex().
    102  */
    103 #define kZipEntryAdj        10000
    104 
    105 ZipFileRO::~ZipFileRO() {
    106     free(mHashTable);
    107     if (mDirectoryMap)
    108         mDirectoryMap->release();
    109     if (mFd >= 0)
    110         TEMP_FAILURE_RETRY(close(mFd));
    111     if (mFileName)
    112         free(mFileName);
    113 }
    114 
    115 /*
    116  * Convert a ZipEntryRO to a hash table index, verifying that it's in a
    117  * valid range.
    118  */
    119 int ZipFileRO::entryToIndex(const ZipEntryRO entry) const
    120 {
    121     long ent = ((intptr_t) entry) - kZipEntryAdj;
    122     if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) {
    123         ALOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent);
    124         return -1;
    125     }
    126     return ent;
    127 }
    128 
    129 
    130 /*
    131  * Open the specified file read-only.  We memory-map the entire thing and
    132  * close the file before returning.
    133  */
    134 status_t ZipFileRO::open(const char* zipFileName)
    135 {
    136     int fd = -1;
    137 
    138     assert(mDirectoryMap == NULL);
    139 
    140     /*
    141      * Open and map the specified file.
    142      */
    143     fd = TEMP_FAILURE_RETRY(::open(zipFileName, O_RDONLY | O_BINARY));
    144     if (fd < 0) {
    145         ALOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno));
    146         return NAME_NOT_FOUND;
    147     }
    148 
    149     mFileLength = lseek64(fd, 0, SEEK_END);
    150     if (mFileLength < kEOCDLen) {
    151         TEMP_FAILURE_RETRY(close(fd));
    152         return UNKNOWN_ERROR;
    153     }
    154 
    155     if (mFileName != NULL) {
    156         free(mFileName);
    157     }
    158     mFileName = strdup(zipFileName);
    159 
    160     mFd = fd;
    161 
    162     /*
    163      * Find the Central Directory and store its size and number of entries.
    164      */
    165     if (!mapCentralDirectory()) {
    166         goto bail;
    167     }
    168 
    169     /*
    170      * Verify Central Directory and create data structures for fast access.
    171      */
    172     if (!parseZipArchive()) {
    173         goto bail;
    174     }
    175 
    176     return OK;
    177 
    178 bail:
    179     free(mFileName);
    180     mFileName = NULL;
    181     TEMP_FAILURE_RETRY(close(fd));
    182     return UNKNOWN_ERROR;
    183 }
    184 
    185 /*
    186  * Parse the Zip archive, verifying its contents and initializing internal
    187  * data structures.
    188  */
    189 bool ZipFileRO::mapCentralDirectory(void)
    190 {
    191     ssize_t readAmount = kMaxEOCDSearch;
    192     if (readAmount > (ssize_t) mFileLength)
    193         readAmount = mFileLength;
    194 
    195     unsigned char* scanBuf = (unsigned char*) malloc(readAmount);
    196     if (scanBuf == NULL) {
    197         ALOGW("couldn't allocate scanBuf: %s", strerror(errno));
    198         free(scanBuf);
    199         return false;
    200     }
    201 
    202     /*
    203      * Make sure this is a Zip archive.
    204      */
    205     if (lseek64(mFd, 0, SEEK_SET) != 0) {
    206         ALOGW("seek to start failed: %s", strerror(errno));
    207         free(scanBuf);
    208         return false;
    209     }
    210 
    211     ssize_t actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, sizeof(int32_t)));
    212     if (actual != (ssize_t) sizeof(int32_t)) {
    213         ALOGI("couldn't read first signature from zip archive: %s", strerror(errno));
    214         free(scanBuf);
    215         return false;
    216     }
    217 
    218     {
    219         unsigned int header = get4LE(scanBuf);
    220         if (header == kEOCDSignature) {
    221             ALOGI("Found Zip archive, but it looks empty\n");
    222             free(scanBuf);
    223             return false;
    224         } else if (header != kLFHSignature) {
    225             ALOGV("Not a Zip archive (found 0x%08x)\n", header);
    226             free(scanBuf);
    227             return false;
    228         }
    229     }
    230 
    231     /*
    232      * Perform the traditional EOCD snipe hunt.
    233      *
    234      * We're searching for the End of Central Directory magic number,
    235      * which appears at the start of the EOCD block.  It's followed by
    236      * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
    237      * need to read the last part of the file into a buffer, dig through
    238      * it to find the magic number, parse some values out, and use those
    239      * to determine the extent of the CD.
    240      *
    241      * We start by pulling in the last part of the file.
    242      */
    243     off64_t searchStart = mFileLength - readAmount;
    244 
    245     if (lseek64(mFd, searchStart, SEEK_SET) != searchStart) {
    246         ALOGW("seek %ld failed: %s\n",  (long) searchStart, strerror(errno));
    247         free(scanBuf);
    248         return false;
    249     }
    250     actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, readAmount));
    251     if (actual != (ssize_t) readAmount) {
    252         ALOGW("Zip: read " ZD ", expected " ZD ". Failed: %s\n",
    253             (ZD_TYPE) actual, (ZD_TYPE) readAmount, strerror(errno));
    254         free(scanBuf);
    255         return false;
    256     }
    257 
    258     /*
    259      * Scan backward for the EOCD magic.  In an archive without a trailing
    260      * comment, we'll find it on the first try.  (We may want to consider
    261      * doing an initial minimal read; if we don't find it, retry with a
    262      * second read as above.)
    263      */
    264     int i;
    265     for (i = readAmount - kEOCDLen; i >= 0; i--) {
    266         if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
    267             ALOGV("+++ Found EOCD at buf+%d\n", i);
    268             break;
    269         }
    270     }
    271     if (i < 0) {
    272         ALOGD("Zip: EOCD not found, %s is not zip\n", mFileName);
    273         free(scanBuf);
    274         return false;
    275     }
    276 
    277     off64_t eocdOffset = searchStart + i;
    278     const unsigned char* eocdPtr = scanBuf + i;
    279 
    280     assert(eocdOffset < mFileLength);
    281 
    282     /*
    283      * Grab the CD offset and size, and the number of entries in the
    284      * archive. After that, we can release our EOCD hunt buffer.
    285      */
    286     unsigned int numEntries = get2LE(eocdPtr + kEOCDNumEntries);
    287     unsigned int dirSize = get4LE(eocdPtr + kEOCDSize);
    288     unsigned int dirOffset = get4LE(eocdPtr + kEOCDFileOffset);
    289     free(scanBuf);
    290 
    291     // Verify that they look reasonable.
    292     if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) {
    293         ALOGW("bad offsets (dir %ld, size %u, eocd %ld)\n",
    294             (long) dirOffset, dirSize, (long) eocdOffset);
    295         return false;
    296     }
    297     if (numEntries == 0) {
    298         ALOGW("empty archive?\n");
    299         return false;
    300     }
    301 
    302     ALOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n",
    303         numEntries, dirSize, dirOffset);
    304 
    305     mDirectoryMap = new FileMap();
    306     if (mDirectoryMap == NULL) {
    307         ALOGW("Unable to create directory map: %s", strerror(errno));
    308         return false;
    309     }
    310 
    311     if (!mDirectoryMap->create(mFileName, mFd, dirOffset, dirSize, true)) {
    312         ALOGW("Unable to map '%s' (" ZD " to " ZD "): %s\n", mFileName,
    313                 (ZD_TYPE) dirOffset, (ZD_TYPE) (dirOffset + dirSize), strerror(errno));
    314         return false;
    315     }
    316 
    317     mNumEntries = numEntries;
    318     mDirectoryOffset = dirOffset;
    319 
    320     return true;
    321 }
    322 
    323 
    324 /*
    325  * Round up to the next highest power of 2.
    326  *
    327  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
    328  */
    329 static unsigned int roundUpPower2(unsigned int val)
    330 {
    331     val--;
    332     val |= val >> 1;
    333     val |= val >> 2;
    334     val |= val >> 4;
    335     val |= val >> 8;
    336     val |= val >> 16;
    337     val++;
    338 
    339     return val;
    340 }
    341 
    342 bool ZipFileRO::parseZipArchive(void)
    343 {
    344     bool result = false;
    345     const unsigned char* cdPtr = (const unsigned char*) mDirectoryMap->getDataPtr();
    346     size_t cdLength = mDirectoryMap->getDataLength();
    347     int numEntries = mNumEntries;
    348 
    349     /*
    350      * Create hash table.  We have a minimum 75% load factor, possibly as
    351      * low as 50% after we round off to a power of 2.
    352      */
    353     mHashTableSize = roundUpPower2(1 + (numEntries * 4) / 3);
    354     mHashTable = (HashEntry*) calloc(mHashTableSize, sizeof(HashEntry));
    355 
    356     /*
    357      * Walk through the central directory, adding entries to the hash
    358      * table.
    359      */
    360     const unsigned char* ptr = cdPtr;
    361     for (int i = 0; i < numEntries; i++) {
    362         if (get4LE(ptr) != kCDESignature) {
    363             ALOGW("Missed a central dir sig (at %d)\n", i);
    364             goto bail;
    365         }
    366         if (ptr + kCDELen > cdPtr + cdLength) {
    367             ALOGW("Ran off the end (at %d)\n", i);
    368             goto bail;
    369         }
    370 
    371         long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
    372         if (localHdrOffset >= mDirectoryOffset) {
    373             ALOGW("bad LFH offset %ld at entry %d\n", localHdrOffset, i);
    374             goto bail;
    375         }
    376 
    377         unsigned int fileNameLen, extraLen, commentLen, hash;
    378 
    379         fileNameLen = get2LE(ptr + kCDENameLen);
    380         extraLen = get2LE(ptr + kCDEExtraLen);
    381         commentLen = get2LE(ptr + kCDECommentLen);
    382 
    383         /* add the CDE filename to the hash table */
    384         hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
    385         addToHash((const char*)ptr + kCDELen, fileNameLen, hash);
    386 
    387         ptr += kCDELen + fileNameLen + extraLen + commentLen;
    388         if ((size_t)(ptr - cdPtr) > cdLength) {
    389             ALOGW("bad CD advance (%d vs " ZD ") at entry %d\n",
    390                 (int) (ptr - cdPtr), (ZD_TYPE) cdLength, i);
    391             goto bail;
    392         }
    393     }
    394     ALOGV("+++ zip good scan %d entries\n", numEntries);
    395     result = true;
    396 
    397 bail:
    398     return result;
    399 }
    400 
    401 /*
    402  * Simple string hash function for non-null-terminated strings.
    403  */
    404 /*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len)
    405 {
    406     unsigned int hash = 0;
    407 
    408     while (len--)
    409         hash = hash * 31 + *str++;
    410 
    411     return hash;
    412 }
    413 
    414 /*
    415  * Add a new entry to the hash table.
    416  */
    417 void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash)
    418 {
    419     int ent = hash & (mHashTableSize-1);
    420 
    421     /*
    422      * We over-allocate the table, so we're guaranteed to find an empty slot.
    423      */
    424     while (mHashTable[ent].name != NULL)
    425         ent = (ent + 1) & (mHashTableSize-1);
    426 
    427     mHashTable[ent].name = str;
    428     mHashTable[ent].nameLen = strLen;
    429 }
    430 
    431 /*
    432  * Find a matching entry.
    433  *
    434  * Returns NULL if not found.
    435  */
    436 ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const
    437 {
    438     /*
    439      * If the ZipFileRO instance is not initialized, the entry number will
    440      * end up being garbage since mHashTableSize is -1.
    441      */
    442     if (mHashTableSize <= 0) {
    443         return NULL;
    444     }
    445 
    446     int nameLen = strlen(fileName);
    447     unsigned int hash = computeHash(fileName, nameLen);
    448     int ent = hash & (mHashTableSize-1);
    449 
    450     while (mHashTable[ent].name != NULL) {
    451         if (mHashTable[ent].nameLen == nameLen &&
    452             memcmp(mHashTable[ent].name, fileName, nameLen) == 0)
    453         {
    454             /* match */
    455             return (ZipEntryRO)(long)(ent + kZipEntryAdj);
    456         }
    457 
    458         ent = (ent + 1) & (mHashTableSize-1);
    459     }
    460 
    461     return NULL;
    462 }
    463 
    464 /*
    465  * Find the Nth entry.
    466  *
    467  * This currently involves walking through the sparse hash table, counting
    468  * non-empty entries.  If we need to speed this up we can either allocate
    469  * a parallel lookup table or (perhaps better) provide an iterator interface.
    470  */
    471 ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const
    472 {
    473     if (idx < 0 || idx >= mNumEntries) {
    474         ALOGW("Invalid index %d\n", idx);
    475         return NULL;
    476     }
    477 
    478     for (int ent = 0; ent < mHashTableSize; ent++) {
    479         if (mHashTable[ent].name != NULL) {
    480             if (idx-- == 0)
    481                 return (ZipEntryRO) (intptr_t)(ent + kZipEntryAdj);
    482         }
    483     }
    484 
    485     return NULL;
    486 }
    487 
    488 /*
    489  * Get the useful fields from the zip entry.
    490  *
    491  * Returns "false" if the offsets to the fields or the contents of the fields
    492  * appear to be bogus.
    493  */
    494 bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
    495     size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const
    496 {
    497     bool ret = false;
    498 
    499     const int ent = entryToIndex(entry);
    500     if (ent < 0)
    501         return false;
    502 
    503     HashEntry hashEntry = mHashTable[ent];
    504 
    505     /*
    506      * Recover the start of the central directory entry from the filename
    507      * pointer.  The filename is the first entry past the fixed-size data,
    508      * so we can just subtract back from that.
    509      */
    510     const unsigned char* ptr = (const unsigned char*) hashEntry.name;
    511     off64_t cdOffset = mDirectoryOffset;
    512 
    513     ptr -= kCDELen;
    514 
    515     int method = get2LE(ptr + kCDEMethod);
    516     if (pMethod != NULL)
    517         *pMethod = method;
    518 
    519     if (pModWhen != NULL)
    520         *pModWhen = get4LE(ptr + kCDEModWhen);
    521     if (pCrc32 != NULL)
    522         *pCrc32 = get4LE(ptr + kCDECRC);
    523 
    524     size_t compLen = get4LE(ptr + kCDECompLen);
    525     if (pCompLen != NULL)
    526         *pCompLen = compLen;
    527     size_t uncompLen = get4LE(ptr + kCDEUncompLen);
    528     if (pUncompLen != NULL)
    529         *pUncompLen = uncompLen;
    530 
    531     /*
    532      * If requested, determine the offset of the start of the data.  All we
    533      * have is the offset to the Local File Header, which is variable size,
    534      * so we have to read the contents of the struct to figure out where
    535      * the actual data starts.
    536      *
    537      * We also need to make sure that the lengths are not so large that
    538      * somebody trying to map the compressed or uncompressed data runs
    539      * off the end of the mapped region.
    540      *
    541      * Note we don't verify compLen/uncompLen if they don't request the
    542      * dataOffset, because dataOffset is expensive to determine.  However,
    543      * if they don't have the file offset, they're not likely to be doing
    544      * anything with the contents.
    545      */
    546     if (pOffset != NULL) {
    547         long localHdrOffset = get4LE(ptr + kCDELocalOffset);
    548         if (localHdrOffset + kLFHLen >= cdOffset) {
    549             ALOGE("ERROR: bad local hdr offset in zip\n");
    550             return false;
    551         }
    552 
    553         unsigned char lfhBuf[kLFHLen];
    554 
    555 #ifdef HAVE_PREAD
    556         /*
    557          * This file descriptor might be from zygote's preloaded assets,
    558          * so we need to do an pread64() instead of a lseek64() + read() to
    559          * guarantee atomicity across the processes with the shared file
    560          * descriptors.
    561          */
    562         ssize_t actual =
    563                 TEMP_FAILURE_RETRY(pread64(mFd, lfhBuf, sizeof(lfhBuf), localHdrOffset));
    564 
    565         if (actual != sizeof(lfhBuf)) {
    566             ALOGW("failed reading lfh from offset %ld\n", localHdrOffset);
    567             return false;
    568         }
    569 
    570         if (get4LE(lfhBuf) != kLFHSignature) {
    571             ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; "
    572                     "got: data=0x%08lx\n",
    573                     localHdrOffset, kLFHSignature, get4LE(lfhBuf));
    574             return false;
    575         }
    576 #else /* HAVE_PREAD */
    577         /*
    578          * For hosts don't have pread64() we cannot guarantee atomic reads from
    579          * an offset in a file. Android should never run on those platforms.
    580          * File descriptors inherited from a fork() share file offsets and
    581          * there would be nothing to protect from two different processes
    582          * calling lseek64() concurrently.
    583          */
    584 
    585         {
    586             AutoMutex _l(mFdLock);
    587 
    588             if (lseek64(mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
    589                 ALOGW("failed seeking to lfh at offset %ld\n", localHdrOffset);
    590                 return false;
    591             }
    592 
    593             ssize_t actual =
    594                     TEMP_FAILURE_RETRY(read(mFd, lfhBuf, sizeof(lfhBuf)));
    595             if (actual != sizeof(lfhBuf)) {
    596                 ALOGW("failed reading lfh from offset %ld\n", localHdrOffset);
    597                 return false;
    598             }
    599 
    600             if (get4LE(lfhBuf) != kLFHSignature) {
    601                 off64_t actualOffset = lseek64(mFd, 0, SEEK_CUR);
    602                 ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; "
    603                         "got: offset=" ZD " data=0x%08lx\n",
    604                         localHdrOffset, kLFHSignature, (ZD_TYPE) actualOffset, get4LE(lfhBuf));
    605                 return false;
    606             }
    607         }
    608 #endif /* HAVE_PREAD */
    609 
    610         off64_t dataOffset = localHdrOffset + kLFHLen
    611             + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
    612         if (dataOffset >= cdOffset) {
    613             ALOGW("bad data offset %ld in zip\n", (long) dataOffset);
    614             return false;
    615         }
    616 
    617         /* check lengths */
    618         if ((off64_t)(dataOffset + compLen) > cdOffset) {
    619             ALOGW("bad compressed length in zip (%ld + " ZD " > %ld)\n",
    620                 (long) dataOffset, (ZD_TYPE) compLen, (long) cdOffset);
    621             return false;
    622         }
    623 
    624         if (method == kCompressStored &&
    625             (off64_t)(dataOffset + uncompLen) > cdOffset)
    626         {
    627             ALOGE("ERROR: bad uncompressed length in zip (%ld + " ZD " > %ld)\n",
    628                 (long) dataOffset, (ZD_TYPE) uncompLen, (long) cdOffset);
    629             return false;
    630         }
    631 
    632         *pOffset = dataOffset;
    633     }
    634 
    635     return true;
    636 }
    637 
    638 /*
    639  * Copy the entry's filename to the buffer.
    640  */
    641 int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen)
    642     const
    643 {
    644     int ent = entryToIndex(entry);
    645     if (ent < 0)
    646         return -1;
    647 
    648     int nameLen = mHashTable[ent].nameLen;
    649     if (bufLen < nameLen+1)
    650         return nameLen+1;
    651 
    652     memcpy(buffer, mHashTable[ent].name, nameLen);
    653     buffer[nameLen] = '\0';
    654     return 0;
    655 }
    656 
    657 /*
    658  * Create a new FileMap object that spans the data in "entry".
    659  */
    660 FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const
    661 {
    662     /*
    663      * TODO: the efficient way to do this is to modify FileMap to allow
    664      * sub-regions of a file to be mapped.  A reference-counting scheme
    665      * can manage the base memory mapping.  For now, we just create a brand
    666      * new mapping off of the Zip archive file descriptor.
    667      */
    668 
    669     FileMap* newMap;
    670     size_t compLen;
    671     off64_t offset;
    672 
    673     if (!getEntryInfo(entry, NULL, NULL, &compLen, &offset, NULL, NULL))
    674         return NULL;
    675 
    676     newMap = new FileMap();
    677     if (!newMap->create(mFileName, mFd, offset, compLen, true)) {
    678         newMap->release();
    679         return NULL;
    680     }
    681 
    682     return newMap;
    683 }
    684 
    685 /*
    686  * Uncompress an entry, in its entirety, into the provided output buffer.
    687  *
    688  * This doesn't verify the data's CRC, which might be useful for
    689  * uncompressed data.  The caller should be able to manage it.
    690  */
    691 bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const
    692 {
    693     const size_t kSequentialMin = 32768;
    694     bool result = false;
    695     int ent = entryToIndex(entry);
    696     if (ent < 0)
    697         return -1;
    698 
    699     int method;
    700     size_t uncompLen, compLen;
    701     off64_t offset;
    702     const unsigned char* ptr;
    703 
    704     getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL);
    705 
    706     FileMap* file = createEntryFileMap(entry);
    707     if (file == NULL) {
    708         goto bail;
    709     }
    710 
    711     ptr = (const unsigned char*) file->getDataPtr();
    712 
    713     /*
    714      * Experiment with madvise hint.  When we want to uncompress a file,
    715      * we pull some stuff out of the central dir entry and then hit a
    716      * bunch of compressed or uncompressed data sequentially.  The CDE
    717      * visit will cause a limited amount of read-ahead because it's at
    718      * the end of the file.  We could end up doing lots of extra disk
    719      * access if the file we're prying open is small.  Bottom line is we
    720      * probably don't want to turn MADV_SEQUENTIAL on and leave it on.
    721      *
    722      * So, if the compressed size of the file is above a certain minimum
    723      * size, temporarily boost the read-ahead in the hope that the extra
    724      * pair of system calls are negated by a reduction in page faults.
    725      */
    726     if (compLen > kSequentialMin)
    727         file->advise(FileMap::SEQUENTIAL);
    728 
    729     if (method == kCompressStored) {
    730         memcpy(buffer, ptr, uncompLen);
    731     } else {
    732         if (!inflateBuffer(buffer, ptr, uncompLen, compLen))
    733             goto unmap;
    734     }
    735 
    736     if (compLen > kSequentialMin)
    737         file->advise(FileMap::NORMAL);
    738 
    739     result = true;
    740 
    741 unmap:
    742     file->release();
    743 bail:
    744     return result;
    745 }
    746 
    747 /*
    748  * Uncompress an entry, in its entirety, to an open file descriptor.
    749  *
    750  * This doesn't verify the data's CRC, but probably should.
    751  */
    752 bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const
    753 {
    754     bool result = false;
    755     int ent = entryToIndex(entry);
    756     if (ent < 0)
    757         return -1;
    758 
    759     int method;
    760     size_t uncompLen, compLen;
    761     off64_t offset;
    762     const unsigned char* ptr;
    763 
    764     getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL);
    765 
    766     FileMap* file = createEntryFileMap(entry);
    767     if (file == NULL) {
    768         goto bail;
    769     }
    770 
    771     ptr = (const unsigned char*) file->getDataPtr();
    772 
    773     if (method == kCompressStored) {
    774         ssize_t actual = TEMP_FAILURE_RETRY(write(fd, ptr, uncompLen));
    775         if (actual < 0) {
    776             ALOGE("Write failed: %s\n", strerror(errno));
    777             goto unmap;
    778         } else if ((size_t) actual != uncompLen) {
    779             ALOGE("Partial write during uncompress (" ZD " of " ZD ")\n",
    780                 (ZD_TYPE) actual, (ZD_TYPE) uncompLen);
    781             goto unmap;
    782         } else {
    783             ALOGI("+++ successful write\n");
    784         }
    785     } else {
    786         if (!inflateBuffer(fd, ptr, uncompLen, compLen))
    787             goto unmap;
    788     }
    789 
    790     result = true;
    791 
    792 unmap:
    793     file->release();
    794 bail:
    795     return result;
    796 }
    797 
    798 /*
    799  * Uncompress "deflate" data from one buffer to another.
    800  */
    801 /*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf,
    802     size_t uncompLen, size_t compLen)
    803 {
    804     bool result = false;
    805     z_stream zstream;
    806     int zerr;
    807 
    808     /*
    809      * Initialize the zlib stream struct.
    810      */
    811     memset(&zstream, 0, sizeof(zstream));
    812     zstream.zalloc = Z_NULL;
    813     zstream.zfree = Z_NULL;
    814     zstream.opaque = Z_NULL;
    815     zstream.next_in = (Bytef*)inBuf;
    816     zstream.avail_in = compLen;
    817     zstream.next_out = (Bytef*) outBuf;
    818     zstream.avail_out = uncompLen;
    819     zstream.data_type = Z_UNKNOWN;
    820 
    821     /*
    822      * Use the undocumented "negative window bits" feature to tell zlib
    823      * that there's no zlib header waiting for it.
    824      */
    825     zerr = inflateInit2(&zstream, -MAX_WBITS);
    826     if (zerr != Z_OK) {
    827         if (zerr == Z_VERSION_ERROR) {
    828             ALOGE("Installed zlib is not compatible with linked version (%s)\n",
    829                 ZLIB_VERSION);
    830         } else {
    831             ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
    832         }
    833         goto bail;
    834     }
    835 
    836     /*
    837      * Expand data.
    838      */
    839     zerr = inflate(&zstream, Z_FINISH);
    840     if (zerr != Z_STREAM_END) {
    841         ALOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
    842             zerr, zstream.next_in, zstream.avail_in,
    843             zstream.next_out, zstream.avail_out);
    844         goto z_bail;
    845     }
    846 
    847     /* paranoia */
    848     if (zstream.total_out != uncompLen) {
    849         ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n",
    850             zstream.total_out, (ZD_TYPE) uncompLen);
    851         goto z_bail;
    852     }
    853 
    854     result = true;
    855 
    856 z_bail:
    857     inflateEnd(&zstream);        /* free up any allocated structures */
    858 
    859 bail:
    860     return result;
    861 }
    862 
    863 /*
    864  * Uncompress "deflate" data from one buffer to an open file descriptor.
    865  */
    866 /*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf,
    867     size_t uncompLen, size_t compLen)
    868 {
    869     bool result = false;
    870     const size_t kWriteBufSize = 32768;
    871     unsigned char writeBuf[kWriteBufSize];
    872     z_stream zstream;
    873     int zerr;
    874 
    875     /*
    876      * Initialize the zlib stream struct.
    877      */
    878     memset(&zstream, 0, sizeof(zstream));
    879     zstream.zalloc = Z_NULL;
    880     zstream.zfree = Z_NULL;
    881     zstream.opaque = Z_NULL;
    882     zstream.next_in = (Bytef*)inBuf;
    883     zstream.avail_in = compLen;
    884     zstream.next_out = (Bytef*) writeBuf;
    885     zstream.avail_out = sizeof(writeBuf);
    886     zstream.data_type = Z_UNKNOWN;
    887 
    888     /*
    889      * Use the undocumented "negative window bits" feature to tell zlib
    890      * that there's no zlib header waiting for it.
    891      */
    892     zerr = inflateInit2(&zstream, -MAX_WBITS);
    893     if (zerr != Z_OK) {
    894         if (zerr == Z_VERSION_ERROR) {
    895             ALOGE("Installed zlib is not compatible with linked version (%s)\n",
    896                 ZLIB_VERSION);
    897         } else {
    898             ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
    899         }
    900         goto bail;
    901     }
    902 
    903     /*
    904      * Loop while we have more to do.
    905      */
    906     do {
    907         /*
    908          * Expand data.
    909          */
    910         zerr = inflate(&zstream, Z_NO_FLUSH);
    911         if (zerr != Z_OK && zerr != Z_STREAM_END) {
    912             ALOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
    913                 zerr, zstream.next_in, zstream.avail_in,
    914                 zstream.next_out, zstream.avail_out);
    915             goto z_bail;
    916         }
    917 
    918         /* write when we're full or when we're done */
    919         if (zstream.avail_out == 0 ||
    920             (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
    921         {
    922             long writeSize = zstream.next_out - writeBuf;
    923             int cc = TEMP_FAILURE_RETRY(write(fd, writeBuf, writeSize));
    924             if (cc < 0) {
    925                 ALOGW("write failed in inflate: %s", strerror(errno));
    926                 goto z_bail;
    927             } else if (cc != (int) writeSize) {
    928                 ALOGW("write failed in inflate (%d vs %ld)", cc, writeSize);
    929                 goto z_bail;
    930             }
    931 
    932             zstream.next_out = writeBuf;
    933             zstream.avail_out = sizeof(writeBuf);
    934         }
    935     } while (zerr == Z_OK);
    936 
    937     assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
    938 
    939     /* paranoia */
    940     if (zstream.total_out != uncompLen) {
    941         ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n",
    942             zstream.total_out, (ZD_TYPE) uncompLen);
    943         goto z_bail;
    944     }
    945 
    946     result = true;
    947 
    948 z_bail:
    949     inflateEnd(&zstream);        /* free up any allocated structures */
    950 
    951 bail:
    952     return result;
    953 }
    954