Home | History | Annotate | Download | only in zipalign
      1 /*
      2  * Copyright (C) 2006 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 //
     18 // Access to Zip archives.
     19 //
     20 
     21 #define LOG_TAG "zip"
     22 
     23 #include <utils/ZipUtils.h>
     24 #include <utils/Log.h>
     25 
     26 #include "ZipFile.h"
     27 
     28 #include <zlib.h>
     29 #define DEF_MEM_LEVEL 8                // normally in zutil.h?
     30 
     31 #include <memory.h>
     32 #include <sys/stat.h>
     33 #include <errno.h>
     34 #include <assert.h>
     35 
     36 using namespace android;
     37 
     38 /*
     39  * Some environments require the "b", some choke on it.
     40  */
     41 #define FILE_OPEN_RO        "rb"
     42 #define FILE_OPEN_RW        "r+b"
     43 #define FILE_OPEN_RW_CREATE "w+b"
     44 
     45 /* should live somewhere else? */
     46 static status_t errnoToStatus(int err)
     47 {
     48     if (err == ENOENT)
     49         return NAME_NOT_FOUND;
     50     else if (err == EACCES)
     51         return PERMISSION_DENIED;
     52     else
     53         return UNKNOWN_ERROR;
     54 }
     55 
     56 /*
     57  * Open a file and parse its guts.
     58  */
     59 status_t ZipFile::open(const char* zipFileName, int flags)
     60 {
     61     bool newArchive = false;
     62 
     63     assert(mZipFp == NULL);     // no reopen
     64 
     65     if ((flags & kOpenTruncate))
     66         flags |= kOpenCreate;           // trunc implies create
     67 
     68     if ((flags & kOpenReadOnly) && (flags & kOpenReadWrite))
     69         return INVALID_OPERATION;       // not both
     70     if (!((flags & kOpenReadOnly) || (flags & kOpenReadWrite)))
     71         return INVALID_OPERATION;       // not neither
     72     if ((flags & kOpenCreate) && !(flags & kOpenReadWrite))
     73         return INVALID_OPERATION;       // create requires write
     74 
     75     if (flags & kOpenTruncate) {
     76         newArchive = true;
     77     } else {
     78         newArchive = (access(zipFileName, F_OK) != 0);
     79         if (!(flags & kOpenCreate) && newArchive) {
     80             /* not creating, must already exist */
     81             LOGD("File %s does not exist", zipFileName);
     82             return NAME_NOT_FOUND;
     83         }
     84     }
     85 
     86     /* open the file */
     87     const char* openflags;
     88     if (flags & kOpenReadWrite) {
     89         if (newArchive)
     90             openflags = FILE_OPEN_RW_CREATE;
     91         else
     92             openflags = FILE_OPEN_RW;
     93     } else {
     94         openflags = FILE_OPEN_RO;
     95     }
     96     mZipFp = fopen(zipFileName, openflags);
     97     if (mZipFp == NULL) {
     98         int err = errno;
     99         LOGD("fopen failed: %d\n", err);
    100         return errnoToStatus(err);
    101     }
    102 
    103     status_t result;
    104     if (!newArchive) {
    105         /*
    106          * Load the central directory.  If that fails, then this probably
    107          * isn't a Zip archive.
    108          */
    109         result = readCentralDir();
    110     } else {
    111         /*
    112          * Newly-created.  The EndOfCentralDir constructor actually
    113          * sets everything to be the way we want it (all zeroes).  We
    114          * set mNeedCDRewrite so that we create *something* if the
    115          * caller doesn't add any files.  (We could also just unlink
    116          * the file if it's brand new and nothing was added, but that's
    117          * probably doing more than we really should -- the user might
    118          * have a need for empty zip files.)
    119          */
    120         mNeedCDRewrite = true;
    121         result = NO_ERROR;
    122     }
    123 
    124     if (flags & kOpenReadOnly)
    125         mReadOnly = true;
    126     else
    127         assert(!mReadOnly);
    128 
    129     return result;
    130 }
    131 
    132 /*
    133  * Return the Nth entry in the archive.
    134  */
    135 ZipEntry* ZipFile::getEntryByIndex(int idx) const
    136 {
    137     if (idx < 0 || idx >= (int) mEntries.size())
    138         return NULL;
    139 
    140     return mEntries[idx];
    141 }
    142 
    143 /*
    144  * Find an entry by name.
    145  */
    146 ZipEntry* ZipFile::getEntryByName(const char* fileName) const
    147 {
    148     /*
    149      * Do a stupid linear string-compare search.
    150      *
    151      * There are various ways to speed this up, especially since it's rare
    152      * to intermingle changes to the archive with "get by name" calls.  We
    153      * don't want to sort the mEntries vector itself, however, because
    154      * it's used to recreate the Central Directory.
    155      *
    156      * (Hash table works, parallel list of pointers in sorted order is good.)
    157      */
    158     int idx;
    159 
    160     for (idx = mEntries.size()-1; idx >= 0; idx--) {
    161         ZipEntry* pEntry = mEntries[idx];
    162         if (!pEntry->getDeleted() &&
    163             strcmp(fileName, pEntry->getFileName()) == 0)
    164         {
    165             return pEntry;
    166         }
    167     }
    168 
    169     return NULL;
    170 }
    171 
    172 /*
    173  * Empty the mEntries vector.
    174  */
    175 void ZipFile::discardEntries(void)
    176 {
    177     int count = mEntries.size();
    178 
    179     while (--count >= 0)
    180         delete mEntries[count];
    181 
    182     mEntries.clear();
    183 }
    184 
    185 
    186 /*
    187  * Find the central directory and read the contents.
    188  *
    189  * The fun thing about ZIP archives is that they may or may not be
    190  * readable from start to end.  In some cases, notably for archives
    191  * that were written to stdout, the only length information is in the
    192  * central directory at the end of the file.
    193  *
    194  * Of course, the central directory can be followed by a variable-length
    195  * comment field, so we have to scan through it backwards.  The comment
    196  * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff
    197  * itself, plus apparently sometimes people throw random junk on the end
    198  * just for the fun of it.
    199  *
    200  * This is all a little wobbly.  If the wrong value ends up in the EOCD
    201  * area, we're hosed.  This appears to be the way that everbody handles
    202  * it though, so we're in pretty good company if this fails.
    203  */
    204 status_t ZipFile::readCentralDir(void)
    205 {
    206     status_t result = NO_ERROR;
    207     unsigned char* buf = NULL;
    208     off_t fileLength, seekStart;
    209     long readAmount;
    210     int i;
    211 
    212     fseek(mZipFp, 0, SEEK_END);
    213     fileLength = ftell(mZipFp);
    214     rewind(mZipFp);
    215 
    216     /* too small to be a ZIP archive? */
    217     if (fileLength < EndOfCentralDir::kEOCDLen) {
    218         LOGD("Length is %ld -- too small\n", (long)fileLength);
    219         result = INVALID_OPERATION;
    220         goto bail;
    221     }
    222 
    223     buf = new unsigned char[EndOfCentralDir::kMaxEOCDSearch];
    224     if (buf == NULL) {
    225         LOGD("Failure allocating %d bytes for EOCD search",
    226              EndOfCentralDir::kMaxEOCDSearch);
    227         result = NO_MEMORY;
    228         goto bail;
    229     }
    230 
    231     if (fileLength > EndOfCentralDir::kMaxEOCDSearch) {
    232         seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch;
    233         readAmount = EndOfCentralDir::kMaxEOCDSearch;
    234     } else {
    235         seekStart = 0;
    236         readAmount = (long) fileLength;
    237     }
    238     if (fseek(mZipFp, seekStart, SEEK_SET) != 0) {
    239         LOGD("Failure seeking to end of zip at %ld", (long) seekStart);
    240         result = UNKNOWN_ERROR;
    241         goto bail;
    242     }
    243 
    244     /* read the last part of the file into the buffer */
    245     if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) {
    246         LOGD("short file? wanted %ld\n", readAmount);
    247         result = UNKNOWN_ERROR;
    248         goto bail;
    249     }
    250 
    251     /* find the end-of-central-dir magic */
    252     for (i = readAmount - 4; i >= 0; i--) {
    253         if (buf[i] == 0x50 &&
    254             ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature)
    255         {
    256             LOGV("+++ Found EOCD at buf+%d\n", i);
    257             break;
    258         }
    259     }
    260     if (i < 0) {
    261         LOGD("EOCD not found, not Zip\n");
    262         result = INVALID_OPERATION;
    263         goto bail;
    264     }
    265 
    266     /* extract eocd values */
    267     result = mEOCD.readBuf(buf + i, readAmount - i);
    268     if (result != NO_ERROR) {
    269         LOGD("Failure reading %ld bytes of EOCD values", readAmount - i);
    270         goto bail;
    271     }
    272     //mEOCD.dump();
    273 
    274     if (mEOCD.mDiskNumber != 0 || mEOCD.mDiskWithCentralDir != 0 ||
    275         mEOCD.mNumEntries != mEOCD.mTotalNumEntries)
    276     {
    277         LOGD("Archive spanning not supported\n");
    278         result = INVALID_OPERATION;
    279         goto bail;
    280     }
    281 
    282     /*
    283      * So far so good.  "mCentralDirSize" is the size in bytes of the
    284      * central directory, so we can just seek back that far to find it.
    285      * We can also seek forward mCentralDirOffset bytes from the
    286      * start of the file.
    287      *
    288      * We're not guaranteed to have the rest of the central dir in the
    289      * buffer, nor are we guaranteed that the central dir will have any
    290      * sort of convenient size.  We need to skip to the start of it and
    291      * read the header, then the other goodies.
    292      *
    293      * The only thing we really need right now is the file comment, which
    294      * we're hoping to preserve.
    295      */
    296     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
    297         LOGD("Failure seeking to central dir offset %ld\n",
    298              mEOCD.mCentralDirOffset);
    299         result = UNKNOWN_ERROR;
    300         goto bail;
    301     }
    302 
    303     /*
    304      * Loop through and read the central dir entries.
    305      */
    306     LOGV("Scanning %d entries...\n", mEOCD.mTotalNumEntries);
    307     int entry;
    308     for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) {
    309         ZipEntry* pEntry = new ZipEntry;
    310 
    311         result = pEntry->initFromCDE(mZipFp);
    312         if (result != NO_ERROR) {
    313             LOGD("initFromCDE failed\n");
    314             delete pEntry;
    315             goto bail;
    316         }
    317 
    318         mEntries.add(pEntry);
    319     }
    320 
    321 
    322     /*
    323      * If all went well, we should now be back at the EOCD.
    324      */
    325     {
    326         unsigned char checkBuf[4];
    327         if (fread(checkBuf, 1, 4, mZipFp) != 4) {
    328             LOGD("EOCD check read failed\n");
    329             result = INVALID_OPERATION;
    330             goto bail;
    331         }
    332         if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) {
    333             LOGD("EOCD read check failed\n");
    334             result = UNKNOWN_ERROR;
    335             goto bail;
    336         }
    337         LOGV("+++ EOCD read check passed\n");
    338     }
    339 
    340 bail:
    341     delete[] buf;
    342     return result;
    343 }
    344 
    345 
    346 /*
    347  * Add a new file to the archive.
    348  *
    349  * This requires creating and populating a ZipEntry structure, and copying
    350  * the data into the file at the appropriate position.  The "appropriate
    351  * position" is the current location of the central directory, which we
    352  * casually overwrite (we can put it back later).
    353  *
    354  * If we were concerned about safety, we would want to make all changes
    355  * in a temp file and then overwrite the original after everything was
    356  * safely written.  Not really a concern for us.
    357  */
    358 status_t ZipFile::addCommon(const char* fileName, const void* data, size_t size,
    359     const char* storageName, int sourceType, int compressionMethod,
    360     ZipEntry** ppEntry)
    361 {
    362     ZipEntry* pEntry = NULL;
    363     status_t result = NO_ERROR;
    364     long lfhPosn, startPosn, endPosn, uncompressedLen;
    365     FILE* inputFp = NULL;
    366     unsigned long crc;
    367     time_t modWhen;
    368 
    369     if (mReadOnly)
    370         return INVALID_OPERATION;
    371 
    372     assert(compressionMethod == ZipEntry::kCompressDeflated ||
    373            compressionMethod == ZipEntry::kCompressStored);
    374 
    375     /* make sure we're in a reasonable state */
    376     assert(mZipFp != NULL);
    377     assert(mEntries.size() == mEOCD.mTotalNumEntries);
    378 
    379     /* make sure it doesn't already exist */
    380     if (getEntryByName(storageName) != NULL)
    381         return ALREADY_EXISTS;
    382 
    383     if (!data) {
    384         inputFp = fopen(fileName, FILE_OPEN_RO);
    385         if (inputFp == NULL)
    386             return errnoToStatus(errno);
    387     }
    388 
    389     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
    390         result = UNKNOWN_ERROR;
    391         goto bail;
    392     }
    393 
    394     pEntry = new ZipEntry;
    395     pEntry->initNew(storageName, NULL);
    396 
    397     /*
    398      * From here on out, failures are more interesting.
    399      */
    400     mNeedCDRewrite = true;
    401 
    402     /*
    403      * Write the LFH, even though it's still mostly blank.  We need it
    404      * as a place-holder.  In theory the LFH isn't necessary, but in
    405      * practice some utilities demand it.
    406      */
    407     lfhPosn = ftell(mZipFp);
    408     pEntry->mLFH.write(mZipFp);
    409     startPosn = ftell(mZipFp);
    410 
    411     /*
    412      * Copy the data in, possibly compressing it as we go.
    413      */
    414     if (sourceType == ZipEntry::kCompressStored) {
    415         if (compressionMethod == ZipEntry::kCompressDeflated) {
    416             bool failed = false;
    417             result = compressFpToFp(mZipFp, inputFp, data, size, &crc);
    418             if (result != NO_ERROR) {
    419                 LOGD("compression failed, storing\n");
    420                 failed = true;
    421             } else {
    422                 /*
    423                  * Make sure it has compressed "enough".  This probably ought
    424                  * to be set through an API call, but I don't expect our
    425                  * criteria to change over time.
    426                  */
    427                 long src = inputFp ? ftell(inputFp) : size;
    428                 long dst = ftell(mZipFp) - startPosn;
    429                 if (dst + (dst / 10) > src) {
    430                     LOGD("insufficient compression (src=%ld dst=%ld), storing\n",
    431                         src, dst);
    432                     failed = true;
    433                 }
    434             }
    435 
    436             if (failed) {
    437                 compressionMethod = ZipEntry::kCompressStored;
    438                 if (inputFp) rewind(inputFp);
    439                 fseek(mZipFp, startPosn, SEEK_SET);
    440                 /* fall through to kCompressStored case */
    441             }
    442         }
    443         /* handle "no compression" request, or failed compression from above */
    444         if (compressionMethod == ZipEntry::kCompressStored) {
    445             if (inputFp) {
    446                 result = copyFpToFp(mZipFp, inputFp, &crc);
    447             } else {
    448                 result = copyDataToFp(mZipFp, data, size, &crc);
    449             }
    450             if (result != NO_ERROR) {
    451                 // don't need to truncate; happens in CDE rewrite
    452                 LOGD("failed copying data in\n");
    453                 goto bail;
    454             }
    455         }
    456 
    457         // currently seeked to end of file
    458         uncompressedLen = inputFp ? ftell(inputFp) : size;
    459     } else if (sourceType == ZipEntry::kCompressDeflated) {
    460         /* we should support uncompressed-from-compressed, but it's not
    461          * important right now */
    462         assert(compressionMethod == ZipEntry::kCompressDeflated);
    463 
    464         bool scanResult;
    465         int method;
    466         long compressedLen;
    467 
    468         scanResult = ZipUtils::examineGzip(inputFp, &method, &uncompressedLen,
    469                         &compressedLen, &crc);
    470         if (!scanResult || method != ZipEntry::kCompressDeflated) {
    471             LOGD("this isn't a deflated gzip file?");
    472             result = UNKNOWN_ERROR;
    473             goto bail;
    474         }
    475 
    476         result = copyPartialFpToFp(mZipFp, inputFp, compressedLen, NULL);
    477         if (result != NO_ERROR) {
    478             LOGD("failed copying gzip data in\n");
    479             goto bail;
    480         }
    481     } else {
    482         assert(false);
    483         result = UNKNOWN_ERROR;
    484         goto bail;
    485     }
    486 
    487     /*
    488      * We could write the "Data Descriptor", but there doesn't seem to
    489      * be any point since we're going to go back and write the LFH.
    490      *
    491      * Update file offsets.
    492      */
    493     endPosn = ftell(mZipFp);            // seeked to end of compressed data
    494 
    495     /*
    496      * Success!  Fill out new values.
    497      */
    498     pEntry->setDataInfo(uncompressedLen, endPosn - startPosn, crc,
    499         compressionMethod);
    500     modWhen = getModTime(inputFp ? fileno(inputFp) : fileno(mZipFp));
    501     pEntry->setModWhen(modWhen);
    502     pEntry->setLFHOffset(lfhPosn);
    503     mEOCD.mNumEntries++;
    504     mEOCD.mTotalNumEntries++;
    505     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
    506     mEOCD.mCentralDirOffset = endPosn;
    507 
    508     /*
    509      * Go back and write the LFH.
    510      */
    511     if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
    512         result = UNKNOWN_ERROR;
    513         goto bail;
    514     }
    515     pEntry->mLFH.write(mZipFp);
    516 
    517     /*
    518      * Add pEntry to the list.
    519      */
    520     mEntries.add(pEntry);
    521     if (ppEntry != NULL)
    522         *ppEntry = pEntry;
    523     pEntry = NULL;
    524 
    525 bail:
    526     if (inputFp != NULL)
    527         fclose(inputFp);
    528     delete pEntry;
    529     return result;
    530 }
    531 
    532 /*
    533  * Add an entry by copying it from another zip file.  If "padding" is
    534  * nonzero, the specified number of bytes will be added to the "extra"
    535  * field in the header.
    536  *
    537  * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
    538  */
    539 status_t ZipFile::add(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
    540     int padding, ZipEntry** ppEntry)
    541 {
    542     ZipEntry* pEntry = NULL;
    543     status_t result;
    544     long lfhPosn, endPosn;
    545 
    546     if (mReadOnly)
    547         return INVALID_OPERATION;
    548 
    549     /* make sure we're in a reasonable state */
    550     assert(mZipFp != NULL);
    551     assert(mEntries.size() == mEOCD.mTotalNumEntries);
    552 
    553     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
    554         result = UNKNOWN_ERROR;
    555         goto bail;
    556     }
    557 
    558     pEntry = new ZipEntry;
    559     if (pEntry == NULL) {
    560         result = NO_MEMORY;
    561         goto bail;
    562     }
    563 
    564     result = pEntry->initFromExternal(pSourceZip, pSourceEntry);
    565     if (result != NO_ERROR)
    566         goto bail;
    567     if (padding != 0) {
    568         result = pEntry->addPadding(padding);
    569         if (result != NO_ERROR)
    570             goto bail;
    571     }
    572 
    573     /*
    574      * From here on out, failures are more interesting.
    575      */
    576     mNeedCDRewrite = true;
    577 
    578     /*
    579      * Write the LFH.  Since we're not recompressing the data, we already
    580      * have all of the fields filled out.
    581      */
    582     lfhPosn = ftell(mZipFp);
    583     pEntry->mLFH.write(mZipFp);
    584 
    585     /*
    586      * Copy the data over.
    587      *
    588      * If the "has data descriptor" flag is set, we want to copy the DD
    589      * fields as well.  This is a fixed-size area immediately following
    590      * the data.
    591      */
    592     if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
    593     {
    594         result = UNKNOWN_ERROR;
    595         goto bail;
    596     }
    597 
    598     off_t copyLen;
    599     copyLen = pSourceEntry->getCompressedLen();
    600     if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
    601         copyLen += ZipEntry::kDataDescriptorLen;
    602 
    603     if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
    604         != NO_ERROR)
    605     {
    606         LOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
    607         result = UNKNOWN_ERROR;
    608         goto bail;
    609     }
    610 
    611     /*
    612      * Update file offsets.
    613      */
    614     endPosn = ftell(mZipFp);
    615 
    616     /*
    617      * Success!  Fill out new values.
    618      */
    619     pEntry->setLFHOffset(lfhPosn);      // sets mCDE.mLocalHeaderRelOffset
    620     mEOCD.mNumEntries++;
    621     mEOCD.mTotalNumEntries++;
    622     mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
    623     mEOCD.mCentralDirOffset = endPosn;
    624 
    625     /*
    626      * Add pEntry to the list.
    627      */
    628     mEntries.add(pEntry);
    629     if (ppEntry != NULL)
    630         *ppEntry = pEntry;
    631     pEntry = NULL;
    632 
    633     result = NO_ERROR;
    634 
    635 bail:
    636     delete pEntry;
    637     return result;
    638 }
    639 
    640 /*
    641  * Copy all of the bytes in "src" to "dst".
    642  *
    643  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
    644  * will be seeked immediately past the data.
    645  */
    646 status_t ZipFile::copyFpToFp(FILE* dstFp, FILE* srcFp, unsigned long* pCRC32)
    647 {
    648     unsigned char tmpBuf[32768];
    649     size_t count;
    650 
    651     *pCRC32 = crc32(0L, Z_NULL, 0);
    652 
    653     while (1) {
    654         count = fread(tmpBuf, 1, sizeof(tmpBuf), srcFp);
    655         if (ferror(srcFp) || ferror(dstFp))
    656             return errnoToStatus(errno);
    657         if (count == 0)
    658             break;
    659 
    660         *pCRC32 = crc32(*pCRC32, tmpBuf, count);
    661 
    662         if (fwrite(tmpBuf, 1, count, dstFp) != count) {
    663             LOGD("fwrite %d bytes failed\n", (int) count);
    664             return UNKNOWN_ERROR;
    665         }
    666     }
    667 
    668     return NO_ERROR;
    669 }
    670 
    671 /*
    672  * Copy all of the bytes in "src" to "dst".
    673  *
    674  * On exit, "dstFp" will be seeked immediately past the data.
    675  */
    676 status_t ZipFile::copyDataToFp(FILE* dstFp,
    677     const void* data, size_t size, unsigned long* pCRC32)
    678 {
    679     size_t count;
    680 
    681     *pCRC32 = crc32(0L, Z_NULL, 0);
    682     if (size > 0) {
    683         *pCRC32 = crc32(*pCRC32, (const unsigned char*)data, size);
    684         if (fwrite(data, 1, size, dstFp) != size) {
    685             LOGD("fwrite %d bytes failed\n", (int) size);
    686             return UNKNOWN_ERROR;
    687         }
    688     }
    689 
    690     return NO_ERROR;
    691 }
    692 
    693 /*
    694  * Copy some of the bytes in "src" to "dst".
    695  *
    696  * If "pCRC32" is NULL, the CRC will not be computed.
    697  *
    698  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
    699  * will be seeked immediately past the data just written.
    700  */
    701 status_t ZipFile::copyPartialFpToFp(FILE* dstFp, FILE* srcFp, long length,
    702     unsigned long* pCRC32)
    703 {
    704     unsigned char tmpBuf[32768];
    705     size_t count;
    706 
    707     if (pCRC32 != NULL)
    708         *pCRC32 = crc32(0L, Z_NULL, 0);
    709 
    710     while (length) {
    711         long readSize;
    712 
    713         readSize = sizeof(tmpBuf);
    714         if (readSize > length)
    715             readSize = length;
    716 
    717         count = fread(tmpBuf, 1, readSize, srcFp);
    718         if ((long) count != readSize) {     // error or unexpected EOF
    719             LOGD("fread %d bytes failed\n", (int) readSize);
    720             return UNKNOWN_ERROR;
    721         }
    722 
    723         if (pCRC32 != NULL)
    724             *pCRC32 = crc32(*pCRC32, tmpBuf, count);
    725 
    726         if (fwrite(tmpBuf, 1, count, dstFp) != count) {
    727             LOGD("fwrite %d bytes failed\n", (int) count);
    728             return UNKNOWN_ERROR;
    729         }
    730 
    731         length -= readSize;
    732     }
    733 
    734     return NO_ERROR;
    735 }
    736 
    737 /*
    738  * Compress all of the data in "srcFp" and write it to "dstFp".
    739  *
    740  * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
    741  * will be seeked immediately past the compressed data.
    742  */
    743 status_t ZipFile::compressFpToFp(FILE* dstFp, FILE* srcFp,
    744     const void* data, size_t size, unsigned long* pCRC32)
    745 {
    746     status_t result = NO_ERROR;
    747     const size_t kBufSize = 32768;
    748     unsigned char* inBuf = NULL;
    749     unsigned char* outBuf = NULL;
    750     z_stream zstream;
    751     bool atEof = false;     // no feof() aviailable yet
    752     unsigned long crc;
    753     int zerr;
    754 
    755     /*
    756      * Create an input buffer and an output buffer.
    757      */
    758     inBuf = new unsigned char[kBufSize];
    759     outBuf = new unsigned char[kBufSize];
    760     if (inBuf == NULL || outBuf == NULL) {
    761         result = NO_MEMORY;
    762         goto bail;
    763     }
    764 
    765     /*
    766      * Initialize the zlib stream.
    767      */
    768     memset(&zstream, 0, sizeof(zstream));
    769     zstream.zalloc = Z_NULL;
    770     zstream.zfree = Z_NULL;
    771     zstream.opaque = Z_NULL;
    772     zstream.next_in = NULL;
    773     zstream.avail_in = 0;
    774     zstream.next_out = outBuf;
    775     zstream.avail_out = kBufSize;
    776     zstream.data_type = Z_UNKNOWN;
    777 
    778     zerr = deflateInit2(&zstream, Z_BEST_COMPRESSION,
    779         Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
    780     if (zerr != Z_OK) {
    781         result = UNKNOWN_ERROR;
    782         if (zerr == Z_VERSION_ERROR) {
    783             LOGE("Installed zlib is not compatible with linked version (%s)\n",
    784                 ZLIB_VERSION);
    785         } else {
    786             LOGD("Call to deflateInit2 failed (zerr=%d)\n", zerr);
    787         }
    788         goto bail;
    789     }
    790 
    791     crc = crc32(0L, Z_NULL, 0);
    792 
    793     /*
    794      * Loop while we have data.
    795      */
    796     do {
    797         size_t getSize;
    798         int flush;
    799 
    800         /* only read if the input buffer is empty */
    801         if (zstream.avail_in == 0 && !atEof) {
    802             LOGV("+++ reading %d bytes\n", (int)kBufSize);
    803             if (data) {
    804                 getSize = size > kBufSize ? kBufSize : size;
    805                 memcpy(inBuf, data, getSize);
    806                 data = ((const char*)data) + getSize;
    807                 size -= getSize;
    808             } else {
    809                 getSize = fread(inBuf, 1, kBufSize, srcFp);
    810                 if (ferror(srcFp)) {
    811                     LOGD("deflate read failed (errno=%d)\n", errno);
    812                     goto z_bail;
    813                 }
    814             }
    815             if (getSize < kBufSize) {
    816                 LOGV("+++  got %d bytes, EOF reached\n",
    817                     (int)getSize);
    818                 atEof = true;
    819             }
    820 
    821             crc = crc32(crc, inBuf, getSize);
    822 
    823             zstream.next_in = inBuf;
    824             zstream.avail_in = getSize;
    825         }
    826 
    827         if (atEof)
    828             flush = Z_FINISH;       /* tell zlib that we're done */
    829         else
    830             flush = Z_NO_FLUSH;     /* more to come! */
    831 
    832         zerr = deflate(&zstream, flush);
    833         if (zerr != Z_OK && zerr != Z_STREAM_END) {
    834             LOGD("zlib deflate call failed (zerr=%d)\n", zerr);
    835             result = UNKNOWN_ERROR;
    836             goto z_bail;
    837         }
    838 
    839         /* write when we're full or when we're done */
    840         if (zstream.avail_out == 0 ||
    841             (zerr == Z_STREAM_END && zstream.avail_out != (uInt) kBufSize))
    842         {
    843             LOGV("+++ writing %d bytes\n", (int) (zstream.next_out - outBuf));
    844             if (fwrite(outBuf, 1, zstream.next_out - outBuf, dstFp) !=
    845                 (size_t)(zstream.next_out - outBuf))
    846             {
    847                 LOGD("write %d failed in deflate\n",
    848                     (int) (zstream.next_out - outBuf));
    849                 goto z_bail;
    850             }
    851 
    852             zstream.next_out = outBuf;
    853             zstream.avail_out = kBufSize;
    854         }
    855     } while (zerr == Z_OK);
    856 
    857     assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
    858 
    859     *pCRC32 = crc;
    860 
    861 z_bail:
    862     deflateEnd(&zstream);        /* free up any allocated structures */
    863 
    864 bail:
    865     delete[] inBuf;
    866     delete[] outBuf;
    867 
    868     return result;
    869 }
    870 
    871 /*
    872  * Mark an entry as deleted.
    873  *
    874  * We will eventually need to crunch the file down, but if several files
    875  * are being removed (perhaps as part of an "update" process) we can make
    876  * things considerably faster by deferring the removal to "flush" time.
    877  */
    878 status_t ZipFile::remove(ZipEntry* pEntry)
    879 {
    880     /*
    881      * Should verify that pEntry is actually part of this archive, and
    882      * not some stray ZipEntry from a different file.
    883      */
    884 
    885     /* mark entry as deleted, and mark archive as dirty */
    886     pEntry->setDeleted();
    887     mNeedCDRewrite = true;
    888     return NO_ERROR;
    889 }
    890 
    891 /*
    892  * Flush any pending writes.
    893  *
    894  * In particular, this will crunch out deleted entries, and write the
    895  * Central Directory and EOCD if we have stomped on them.
    896  */
    897 status_t ZipFile::flush(void)
    898 {
    899     status_t result = NO_ERROR;
    900     long eocdPosn;
    901     int i, count;
    902 
    903     if (mReadOnly)
    904         return INVALID_OPERATION;
    905     if (!mNeedCDRewrite)
    906         return NO_ERROR;
    907 
    908     assert(mZipFp != NULL);
    909 
    910     result = crunchArchive();
    911     if (result != NO_ERROR)
    912         return result;
    913 
    914     if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0)
    915         return UNKNOWN_ERROR;
    916 
    917     count = mEntries.size();
    918     for (i = 0; i < count; i++) {
    919         ZipEntry* pEntry = mEntries[i];
    920         pEntry->mCDE.write(mZipFp);
    921     }
    922 
    923     eocdPosn = ftell(mZipFp);
    924     mEOCD.mCentralDirSize = eocdPosn - mEOCD.mCentralDirOffset;
    925 
    926     mEOCD.write(mZipFp);
    927 
    928     /*
    929      * If we had some stuff bloat up during compression and get replaced
    930      * with plain files, or if we deleted some entries, there's a lot
    931      * of wasted space at the end of the file.  Remove it now.
    932      */
    933     if (ftruncate(fileno(mZipFp), ftell(mZipFp)) != 0) {
    934         LOGW("ftruncate failed %ld: %s\n", ftell(mZipFp), strerror(errno));
    935         // not fatal
    936     }
    937 
    938     /* should we clear the "newly added" flag in all entries now? */
    939 
    940     mNeedCDRewrite = false;
    941     return NO_ERROR;
    942 }
    943 
    944 /*
    945  * Crunch deleted files out of an archive by shifting the later files down.
    946  *
    947  * Because we're not using a temp file, we do the operation inside the
    948  * current file.
    949  */
    950 status_t ZipFile::crunchArchive(void)
    951 {
    952     status_t result = NO_ERROR;
    953     int i, count;
    954     long delCount, adjust;
    955 
    956 #if 0
    957     printf("CONTENTS:\n");
    958     for (i = 0; i < (int) mEntries.size(); i++) {
    959         printf(" %d: lfhOff=%ld del=%d\n",
    960             i, mEntries[i]->getLFHOffset(), mEntries[i]->getDeleted());
    961     }
    962     printf("  END is %ld\n", (long) mEOCD.mCentralDirOffset);
    963 #endif
    964 
    965     /*
    966      * Roll through the set of files, shifting them as appropriate.  We
    967      * could probably get a slight performance improvement by sliding
    968      * multiple files down at once (because we could use larger reads
    969      * when operating on batches of small files), but it's not that useful.
    970      */
    971     count = mEntries.size();
    972     delCount = adjust = 0;
    973     for (i = 0; i < count; i++) {
    974         ZipEntry* pEntry = mEntries[i];
    975         long span;
    976 
    977         if (pEntry->getLFHOffset() != 0) {
    978             long nextOffset;
    979 
    980             /* Get the length of this entry by finding the offset
    981              * of the next entry.  Directory entries don't have
    982              * file offsets, so we need to find the next non-directory
    983              * entry.
    984              */
    985             nextOffset = 0;
    986             for (int ii = i+1; nextOffset == 0 && ii < count; ii++)
    987                 nextOffset = mEntries[ii]->getLFHOffset();
    988             if (nextOffset == 0)
    989                 nextOffset = mEOCD.mCentralDirOffset;
    990             span = nextOffset - pEntry->getLFHOffset();
    991 
    992             assert(span >= ZipEntry::LocalFileHeader::kLFHLen);
    993         } else {
    994             /* This is a directory entry.  It doesn't have
    995              * any actual file contents, so there's no need to
    996              * move anything.
    997              */
    998             span = 0;
    999         }
   1000 
   1001         //printf("+++ %d: off=%ld span=%ld del=%d [count=%d]\n",
   1002         //    i, pEntry->getLFHOffset(), span, pEntry->getDeleted(), count);
   1003 
   1004         if (pEntry->getDeleted()) {
   1005             adjust += span;
   1006             delCount++;
   1007 
   1008             delete pEntry;
   1009             mEntries.removeAt(i);
   1010 
   1011             /* adjust loop control */
   1012             count--;
   1013             i--;
   1014         } else if (span != 0 && adjust > 0) {
   1015             /* shuffle this entry back */
   1016             //printf("+++ Shuffling '%s' back %ld\n",
   1017             //    pEntry->getFileName(), adjust);
   1018             result = filemove(mZipFp, pEntry->getLFHOffset() - adjust,
   1019                         pEntry->getLFHOffset(), span);
   1020             if (result != NO_ERROR) {
   1021                 /* this is why you use a temp file */
   1022                 LOGE("error during crunch - archive is toast\n");
   1023                 return result;
   1024             }
   1025 
   1026             pEntry->setLFHOffset(pEntry->getLFHOffset() - adjust);
   1027         }
   1028     }
   1029 
   1030     /*
   1031      * Fix EOCD info.  We have to wait until the end to do some of this
   1032      * because we use mCentralDirOffset to determine "span" for the
   1033      * last entry.
   1034      */
   1035     mEOCD.mCentralDirOffset -= adjust;
   1036     mEOCD.mNumEntries -= delCount;
   1037     mEOCD.mTotalNumEntries -= delCount;
   1038     mEOCD.mCentralDirSize = 0;  // mark invalid; set by flush()
   1039 
   1040     assert(mEOCD.mNumEntries == mEOCD.mTotalNumEntries);
   1041     assert(mEOCD.mNumEntries == count);
   1042 
   1043     return result;
   1044 }
   1045 
   1046 /*
   1047  * Works like memmove(), but on pieces of a file.
   1048  */
   1049 status_t ZipFile::filemove(FILE* fp, off_t dst, off_t src, size_t n)
   1050 {
   1051     if (dst == src || n <= 0)
   1052         return NO_ERROR;
   1053 
   1054     unsigned char readBuf[32768];
   1055 
   1056     if (dst < src) {
   1057         /* shift stuff toward start of file; must read from start */
   1058         while (n != 0) {
   1059             size_t getSize = sizeof(readBuf);
   1060             if (getSize > n)
   1061                 getSize = n;
   1062 
   1063             if (fseek(fp, (long) src, SEEK_SET) != 0) {
   1064                 LOGD("filemove src seek %ld failed\n", (long) src);
   1065                 return UNKNOWN_ERROR;
   1066             }
   1067 
   1068             if (fread(readBuf, 1, getSize, fp) != getSize) {
   1069                 LOGD("filemove read %ld off=%ld failed\n",
   1070                     (long) getSize, (long) src);
   1071                 return UNKNOWN_ERROR;
   1072             }
   1073 
   1074             if (fseek(fp, (long) dst, SEEK_SET) != 0) {
   1075                 LOGD("filemove dst seek %ld failed\n", (long) dst);
   1076                 return UNKNOWN_ERROR;
   1077             }
   1078 
   1079             if (fwrite(readBuf, 1, getSize, fp) != getSize) {
   1080                 LOGD("filemove write %ld off=%ld failed\n",
   1081                     (long) getSize, (long) dst);
   1082                 return UNKNOWN_ERROR;
   1083             }
   1084 
   1085             src += getSize;
   1086             dst += getSize;
   1087             n -= getSize;
   1088         }
   1089     } else {
   1090         /* shift stuff toward end of file; must read from end */
   1091         assert(false);      // write this someday, maybe
   1092         return UNKNOWN_ERROR;
   1093     }
   1094 
   1095     return NO_ERROR;
   1096 }
   1097 
   1098 
   1099 /*
   1100  * Get the modification time from a file descriptor.
   1101  */
   1102 time_t ZipFile::getModTime(int fd)
   1103 {
   1104     struct stat sb;
   1105 
   1106     if (fstat(fd, &sb) < 0) {
   1107         LOGD("HEY: fstat on fd %d failed\n", fd);
   1108         return (time_t) -1;
   1109     }
   1110 
   1111     return sb.st_mtime;
   1112 }
   1113 
   1114 
   1115 #if 0       /* this is a bad idea */
   1116 /*
   1117  * Get a copy of the Zip file descriptor.
   1118  *
   1119  * We don't allow this if the file was opened read-write because we tend
   1120  * to leave the file contents in an uncertain state between calls to
   1121  * flush().  The duplicated file descriptor should only be valid for reads.
   1122  */
   1123 int ZipFile::getZipFd(void) const
   1124 {
   1125     if (!mReadOnly)
   1126         return INVALID_OPERATION;
   1127     assert(mZipFp != NULL);
   1128 
   1129     int fd;
   1130     fd = dup(fileno(mZipFp));
   1131     if (fd < 0) {
   1132         LOGD("didn't work, errno=%d\n", errno);
   1133     }
   1134 
   1135     return fd;
   1136 }
   1137 #endif
   1138 
   1139 
   1140 #if 0
   1141 /*
   1142  * Expand data.
   1143  */
   1144 bool ZipFile::uncompress(const ZipEntry* pEntry, void* buf) const
   1145 {
   1146     return false;
   1147 }
   1148 #endif
   1149 
   1150 // free the memory when you're done
   1151 void* ZipFile::uncompress(const ZipEntry* entry)
   1152 {
   1153     size_t unlen = entry->getUncompressedLen();
   1154     size_t clen = entry->getCompressedLen();
   1155 
   1156     void* buf = malloc(unlen);
   1157     if (buf == NULL) {
   1158         return NULL;
   1159     }
   1160 
   1161     fseek(mZipFp, 0, SEEK_SET);
   1162 
   1163     off_t offset = entry->getFileOffset();
   1164     if (fseek(mZipFp, offset, SEEK_SET) != 0) {
   1165         goto bail;
   1166     }
   1167 
   1168     switch (entry->getCompressionMethod())
   1169     {
   1170         case ZipEntry::kCompressStored: {
   1171             ssize_t amt = fread(buf, 1, unlen, mZipFp);
   1172             if (amt != (ssize_t)unlen) {
   1173                 goto bail;
   1174             }
   1175 #if 0
   1176             printf("data...\n");
   1177             const unsigned char* p = (unsigned char*)buf;
   1178             const unsigned char* end = p+unlen;
   1179             for (int i=0; i<32 && p < end; i++) {
   1180                 printf("0x%08x ", (int)(offset+(i*0x10)));
   1181                 for (int j=0; j<0x10 && p < end; j++) {
   1182                     printf(" %02x", *p);
   1183                     p++;
   1184                 }
   1185                 printf("\n");
   1186             }
   1187 #endif
   1188 
   1189             }
   1190             break;
   1191         case ZipEntry::kCompressDeflated: {
   1192             if (!ZipUtils::inflateToBuffer(mZipFp, buf, unlen, clen)) {
   1193                 goto bail;
   1194             }
   1195             }
   1196             break;
   1197         default:
   1198             goto bail;
   1199     }
   1200     return buf;
   1201 
   1202 bail:
   1203     free(buf);
   1204     return NULL;
   1205 }
   1206 
   1207 
   1208 /*
   1209  * ===========================================================================
   1210  *      ZipFile::EndOfCentralDir
   1211  * ===========================================================================
   1212  */
   1213 
   1214 /*
   1215  * Read the end-of-central-dir fields.
   1216  *
   1217  * "buf" should be positioned at the EOCD signature, and should contain
   1218  * the entire EOCD area including the comment.
   1219  */
   1220 status_t ZipFile::EndOfCentralDir::readBuf(const unsigned char* buf, int len)
   1221 {
   1222     /* don't allow re-use */
   1223     assert(mComment == NULL);
   1224 
   1225     if (len < kEOCDLen) {
   1226         /* looks like ZIP file got truncated */
   1227         LOGD(" Zip EOCD: expected >= %d bytes, found %d\n",
   1228             kEOCDLen, len);
   1229         return INVALID_OPERATION;
   1230     }
   1231 
   1232     /* this should probably be an assert() */
   1233     if (ZipEntry::getLongLE(&buf[0x00]) != kSignature)
   1234         return UNKNOWN_ERROR;
   1235 
   1236     mDiskNumber = ZipEntry::getShortLE(&buf[0x04]);
   1237     mDiskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]);
   1238     mNumEntries = ZipEntry::getShortLE(&buf[0x08]);
   1239     mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]);
   1240     mCentralDirSize = ZipEntry::getLongLE(&buf[0x0c]);
   1241     mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]);
   1242     mCommentLen = ZipEntry::getShortLE(&buf[0x14]);
   1243 
   1244     // TODO: validate mCentralDirOffset
   1245 
   1246     if (mCommentLen > 0) {
   1247         if (kEOCDLen + mCommentLen > len) {
   1248             LOGD("EOCD(%d) + comment(%d) exceeds len (%d)\n",
   1249                 kEOCDLen, mCommentLen, len);
   1250             return UNKNOWN_ERROR;
   1251         }
   1252         mComment = new unsigned char[mCommentLen];
   1253         memcpy(mComment, buf + kEOCDLen, mCommentLen);
   1254     }
   1255 
   1256     return NO_ERROR;
   1257 }
   1258 
   1259 /*
   1260  * Write an end-of-central-directory section.
   1261  */
   1262 status_t ZipFile::EndOfCentralDir::write(FILE* fp)
   1263 {
   1264     unsigned char buf[kEOCDLen];
   1265 
   1266     ZipEntry::putLongLE(&buf[0x00], kSignature);
   1267     ZipEntry::putShortLE(&buf[0x04], mDiskNumber);
   1268     ZipEntry::putShortLE(&buf[0x06], mDiskWithCentralDir);
   1269     ZipEntry::putShortLE(&buf[0x08], mNumEntries);
   1270     ZipEntry::putShortLE(&buf[0x0a], mTotalNumEntries);
   1271     ZipEntry::putLongLE(&buf[0x0c], mCentralDirSize);
   1272     ZipEntry::putLongLE(&buf[0x10], mCentralDirOffset);
   1273     ZipEntry::putShortLE(&buf[0x14], mCommentLen);
   1274 
   1275     if (fwrite(buf, 1, kEOCDLen, fp) != kEOCDLen)
   1276         return UNKNOWN_ERROR;
   1277     if (mCommentLen > 0) {
   1278         assert(mComment != NULL);
   1279         if (fwrite(mComment, mCommentLen, 1, fp) != mCommentLen)
   1280             return UNKNOWN_ERROR;
   1281     }
   1282 
   1283     return NO_ERROR;
   1284 }
   1285 
   1286 /*
   1287  * Dump the contents of an EndOfCentralDir object.
   1288  */
   1289 void ZipFile::EndOfCentralDir::dump(void) const
   1290 {
   1291     LOGD(" EndOfCentralDir contents:\n");
   1292     LOGD("  diskNum=%u diskWCD=%u numEnt=%u totalNumEnt=%u\n",
   1293         mDiskNumber, mDiskWithCentralDir, mNumEntries, mTotalNumEntries);
   1294     LOGD("  centDirSize=%lu centDirOff=%lu commentLen=%u\n",
   1295         mCentralDirSize, mCentralDirOffset, mCommentLen);
   1296 }
   1297 
   1298