1 /* 2 * Copyright (C) 2006 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // 18 // Access to Zip archives. 19 // 20 21 #define LOG_TAG "zip" 22 23 #include <utils/ZipUtils.h> 24 #include <utils/Log.h> 25 26 #include "ZipFile.h" 27 28 #include <zlib.h> 29 #define DEF_MEM_LEVEL 8 // normally in zutil.h? 30 31 #include <memory.h> 32 #include <sys/stat.h> 33 #include <errno.h> 34 #include <assert.h> 35 36 using namespace android; 37 38 /* 39 * Some environments require the "b", some choke on it. 40 */ 41 #define FILE_OPEN_RO "rb" 42 #define FILE_OPEN_RW "r+b" 43 #define FILE_OPEN_RW_CREATE "w+b" 44 45 /* should live somewhere else? */ 46 static status_t errnoToStatus(int err) 47 { 48 if (err == ENOENT) 49 return NAME_NOT_FOUND; 50 else if (err == EACCES) 51 return PERMISSION_DENIED; 52 else 53 return UNKNOWN_ERROR; 54 } 55 56 /* 57 * Open a file and parse its guts. 58 */ 59 status_t ZipFile::open(const char* zipFileName, int flags) 60 { 61 bool newArchive = false; 62 63 assert(mZipFp == NULL); // no reopen 64 65 if ((flags & kOpenTruncate)) 66 flags |= kOpenCreate; // trunc implies create 67 68 if ((flags & kOpenReadOnly) && (flags & kOpenReadWrite)) 69 return INVALID_OPERATION; // not both 70 if (!((flags & kOpenReadOnly) || (flags & kOpenReadWrite))) 71 return INVALID_OPERATION; // not neither 72 if ((flags & kOpenCreate) && !(flags & kOpenReadWrite)) 73 return INVALID_OPERATION; // create requires write 74 75 if (flags & kOpenTruncate) { 76 newArchive = true; 77 } else { 78 newArchive = (access(zipFileName, F_OK) != 0); 79 if (!(flags & kOpenCreate) && newArchive) { 80 /* not creating, must already exist */ 81 LOGD("File %s does not exist", zipFileName); 82 return NAME_NOT_FOUND; 83 } 84 } 85 86 /* open the file */ 87 const char* openflags; 88 if (flags & kOpenReadWrite) { 89 if (newArchive) 90 openflags = FILE_OPEN_RW_CREATE; 91 else 92 openflags = FILE_OPEN_RW; 93 } else { 94 openflags = FILE_OPEN_RO; 95 } 96 mZipFp = fopen(zipFileName, openflags); 97 if (mZipFp == NULL) { 98 int err = errno; 99 LOGD("fopen failed: %d\n", err); 100 return errnoToStatus(err); 101 } 102 103 status_t result; 104 if (!newArchive) { 105 /* 106 * Load the central directory. If that fails, then this probably 107 * isn't a Zip archive. 108 */ 109 result = readCentralDir(); 110 } else { 111 /* 112 * Newly-created. The EndOfCentralDir constructor actually 113 * sets everything to be the way we want it (all zeroes). We 114 * set mNeedCDRewrite so that we create *something* if the 115 * caller doesn't add any files. (We could also just unlink 116 * the file if it's brand new and nothing was added, but that's 117 * probably doing more than we really should -- the user might 118 * have a need for empty zip files.) 119 */ 120 mNeedCDRewrite = true; 121 result = NO_ERROR; 122 } 123 124 if (flags & kOpenReadOnly) 125 mReadOnly = true; 126 else 127 assert(!mReadOnly); 128 129 return result; 130 } 131 132 /* 133 * Return the Nth entry in the archive. 134 */ 135 ZipEntry* ZipFile::getEntryByIndex(int idx) const 136 { 137 if (idx < 0 || idx >= (int) mEntries.size()) 138 return NULL; 139 140 return mEntries[idx]; 141 } 142 143 /* 144 * Find an entry by name. 145 */ 146 ZipEntry* ZipFile::getEntryByName(const char* fileName) const 147 { 148 /* 149 * Do a stupid linear string-compare search. 150 * 151 * There are various ways to speed this up, especially since it's rare 152 * to intermingle changes to the archive with "get by name" calls. We 153 * don't want to sort the mEntries vector itself, however, because 154 * it's used to recreate the Central Directory. 155 * 156 * (Hash table works, parallel list of pointers in sorted order is good.) 157 */ 158 int idx; 159 160 for (idx = mEntries.size()-1; idx >= 0; idx--) { 161 ZipEntry* pEntry = mEntries[idx]; 162 if (!pEntry->getDeleted() && 163 strcmp(fileName, pEntry->getFileName()) == 0) 164 { 165 return pEntry; 166 } 167 } 168 169 return NULL; 170 } 171 172 /* 173 * Empty the mEntries vector. 174 */ 175 void ZipFile::discardEntries(void) 176 { 177 int count = mEntries.size(); 178 179 while (--count >= 0) 180 delete mEntries[count]; 181 182 mEntries.clear(); 183 } 184 185 186 /* 187 * Find the central directory and read the contents. 188 * 189 * The fun thing about ZIP archives is that they may or may not be 190 * readable from start to end. In some cases, notably for archives 191 * that were written to stdout, the only length information is in the 192 * central directory at the end of the file. 193 * 194 * Of course, the central directory can be followed by a variable-length 195 * comment field, so we have to scan through it backwards. The comment 196 * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff 197 * itself, plus apparently sometimes people throw random junk on the end 198 * just for the fun of it. 199 * 200 * This is all a little wobbly. If the wrong value ends up in the EOCD 201 * area, we're hosed. This appears to be the way that everbody handles 202 * it though, so we're in pretty good company if this fails. 203 */ 204 status_t ZipFile::readCentralDir(void) 205 { 206 status_t result = NO_ERROR; 207 unsigned char* buf = NULL; 208 off_t fileLength, seekStart; 209 long readAmount; 210 int i; 211 212 fseek(mZipFp, 0, SEEK_END); 213 fileLength = ftell(mZipFp); 214 rewind(mZipFp); 215 216 /* too small to be a ZIP archive? */ 217 if (fileLength < EndOfCentralDir::kEOCDLen) { 218 LOGD("Length is %ld -- too small\n", (long)fileLength); 219 result = INVALID_OPERATION; 220 goto bail; 221 } 222 223 buf = new unsigned char[EndOfCentralDir::kMaxEOCDSearch]; 224 if (buf == NULL) { 225 LOGD("Failure allocating %d bytes for EOCD search", 226 EndOfCentralDir::kMaxEOCDSearch); 227 result = NO_MEMORY; 228 goto bail; 229 } 230 231 if (fileLength > EndOfCentralDir::kMaxEOCDSearch) { 232 seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch; 233 readAmount = EndOfCentralDir::kMaxEOCDSearch; 234 } else { 235 seekStart = 0; 236 readAmount = (long) fileLength; 237 } 238 if (fseek(mZipFp, seekStart, SEEK_SET) != 0) { 239 LOGD("Failure seeking to end of zip at %ld", (long) seekStart); 240 result = UNKNOWN_ERROR; 241 goto bail; 242 } 243 244 /* read the last part of the file into the buffer */ 245 if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) { 246 LOGD("short file? wanted %ld\n", readAmount); 247 result = UNKNOWN_ERROR; 248 goto bail; 249 } 250 251 /* find the end-of-central-dir magic */ 252 for (i = readAmount - 4; i >= 0; i--) { 253 if (buf[i] == 0x50 && 254 ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature) 255 { 256 LOGV("+++ Found EOCD at buf+%d\n", i); 257 break; 258 } 259 } 260 if (i < 0) { 261 LOGD("EOCD not found, not Zip\n"); 262 result = INVALID_OPERATION; 263 goto bail; 264 } 265 266 /* extract eocd values */ 267 result = mEOCD.readBuf(buf + i, readAmount - i); 268 if (result != NO_ERROR) { 269 LOGD("Failure reading %ld bytes of EOCD values", readAmount - i); 270 goto bail; 271 } 272 //mEOCD.dump(); 273 274 if (mEOCD.mDiskNumber != 0 || mEOCD.mDiskWithCentralDir != 0 || 275 mEOCD.mNumEntries != mEOCD.mTotalNumEntries) 276 { 277 LOGD("Archive spanning not supported\n"); 278 result = INVALID_OPERATION; 279 goto bail; 280 } 281 282 /* 283 * So far so good. "mCentralDirSize" is the size in bytes of the 284 * central directory, so we can just seek back that far to find it. 285 * We can also seek forward mCentralDirOffset bytes from the 286 * start of the file. 287 * 288 * We're not guaranteed to have the rest of the central dir in the 289 * buffer, nor are we guaranteed that the central dir will have any 290 * sort of convenient size. We need to skip to the start of it and 291 * read the header, then the other goodies. 292 * 293 * The only thing we really need right now is the file comment, which 294 * we're hoping to preserve. 295 */ 296 if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) { 297 LOGD("Failure seeking to central dir offset %ld\n", 298 mEOCD.mCentralDirOffset); 299 result = UNKNOWN_ERROR; 300 goto bail; 301 } 302 303 /* 304 * Loop through and read the central dir entries. 305 */ 306 LOGV("Scanning %d entries...\n", mEOCD.mTotalNumEntries); 307 int entry; 308 for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) { 309 ZipEntry* pEntry = new ZipEntry; 310 311 result = pEntry->initFromCDE(mZipFp); 312 if (result != NO_ERROR) { 313 LOGD("initFromCDE failed\n"); 314 delete pEntry; 315 goto bail; 316 } 317 318 mEntries.add(pEntry); 319 } 320 321 322 /* 323 * If all went well, we should now be back at the EOCD. 324 */ 325 { 326 unsigned char checkBuf[4]; 327 if (fread(checkBuf, 1, 4, mZipFp) != 4) { 328 LOGD("EOCD check read failed\n"); 329 result = INVALID_OPERATION; 330 goto bail; 331 } 332 if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) { 333 LOGD("EOCD read check failed\n"); 334 result = UNKNOWN_ERROR; 335 goto bail; 336 } 337 LOGV("+++ EOCD read check passed\n"); 338 } 339 340 bail: 341 delete[] buf; 342 return result; 343 } 344 345 346 /* 347 * Add a new file to the archive. 348 * 349 * This requires creating and populating a ZipEntry structure, and copying 350 * the data into the file at the appropriate position. The "appropriate 351 * position" is the current location of the central directory, which we 352 * casually overwrite (we can put it back later). 353 * 354 * If we were concerned about safety, we would want to make all changes 355 * in a temp file and then overwrite the original after everything was 356 * safely written. Not really a concern for us. 357 */ 358 status_t ZipFile::addCommon(const char* fileName, const void* data, size_t size, 359 const char* storageName, int sourceType, int compressionMethod, 360 ZipEntry** ppEntry) 361 { 362 ZipEntry* pEntry = NULL; 363 status_t result = NO_ERROR; 364 long lfhPosn, startPosn, endPosn, uncompressedLen; 365 FILE* inputFp = NULL; 366 unsigned long crc; 367 time_t modWhen; 368 369 if (mReadOnly) 370 return INVALID_OPERATION; 371 372 assert(compressionMethod == ZipEntry::kCompressDeflated || 373 compressionMethod == ZipEntry::kCompressStored); 374 375 /* make sure we're in a reasonable state */ 376 assert(mZipFp != NULL); 377 assert(mEntries.size() == mEOCD.mTotalNumEntries); 378 379 /* make sure it doesn't already exist */ 380 if (getEntryByName(storageName) != NULL) 381 return ALREADY_EXISTS; 382 383 if (!data) { 384 inputFp = fopen(fileName, FILE_OPEN_RO); 385 if (inputFp == NULL) 386 return errnoToStatus(errno); 387 } 388 389 if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) { 390 result = UNKNOWN_ERROR; 391 goto bail; 392 } 393 394 pEntry = new ZipEntry; 395 pEntry->initNew(storageName, NULL); 396 397 /* 398 * From here on out, failures are more interesting. 399 */ 400 mNeedCDRewrite = true; 401 402 /* 403 * Write the LFH, even though it's still mostly blank. We need it 404 * as a place-holder. In theory the LFH isn't necessary, but in 405 * practice some utilities demand it. 406 */ 407 lfhPosn = ftell(mZipFp); 408 pEntry->mLFH.write(mZipFp); 409 startPosn = ftell(mZipFp); 410 411 /* 412 * Copy the data in, possibly compressing it as we go. 413 */ 414 if (sourceType == ZipEntry::kCompressStored) { 415 if (compressionMethod == ZipEntry::kCompressDeflated) { 416 bool failed = false; 417 result = compressFpToFp(mZipFp, inputFp, data, size, &crc); 418 if (result != NO_ERROR) { 419 LOGD("compression failed, storing\n"); 420 failed = true; 421 } else { 422 /* 423 * Make sure it has compressed "enough". This probably ought 424 * to be set through an API call, but I don't expect our 425 * criteria to change over time. 426 */ 427 long src = inputFp ? ftell(inputFp) : size; 428 long dst = ftell(mZipFp) - startPosn; 429 if (dst + (dst / 10) > src) { 430 LOGD("insufficient compression (src=%ld dst=%ld), storing\n", 431 src, dst); 432 failed = true; 433 } 434 } 435 436 if (failed) { 437 compressionMethod = ZipEntry::kCompressStored; 438 if (inputFp) rewind(inputFp); 439 fseek(mZipFp, startPosn, SEEK_SET); 440 /* fall through to kCompressStored case */ 441 } 442 } 443 /* handle "no compression" request, or failed compression from above */ 444 if (compressionMethod == ZipEntry::kCompressStored) { 445 if (inputFp) { 446 result = copyFpToFp(mZipFp, inputFp, &crc); 447 } else { 448 result = copyDataToFp(mZipFp, data, size, &crc); 449 } 450 if (result != NO_ERROR) { 451 // don't need to truncate; happens in CDE rewrite 452 LOGD("failed copying data in\n"); 453 goto bail; 454 } 455 } 456 457 // currently seeked to end of file 458 uncompressedLen = inputFp ? ftell(inputFp) : size; 459 } else if (sourceType == ZipEntry::kCompressDeflated) { 460 /* we should support uncompressed-from-compressed, but it's not 461 * important right now */ 462 assert(compressionMethod == ZipEntry::kCompressDeflated); 463 464 bool scanResult; 465 int method; 466 long compressedLen; 467 468 scanResult = ZipUtils::examineGzip(inputFp, &method, &uncompressedLen, 469 &compressedLen, &crc); 470 if (!scanResult || method != ZipEntry::kCompressDeflated) { 471 LOGD("this isn't a deflated gzip file?"); 472 result = UNKNOWN_ERROR; 473 goto bail; 474 } 475 476 result = copyPartialFpToFp(mZipFp, inputFp, compressedLen, NULL); 477 if (result != NO_ERROR) { 478 LOGD("failed copying gzip data in\n"); 479 goto bail; 480 } 481 } else { 482 assert(false); 483 result = UNKNOWN_ERROR; 484 goto bail; 485 } 486 487 /* 488 * We could write the "Data Descriptor", but there doesn't seem to 489 * be any point since we're going to go back and write the LFH. 490 * 491 * Update file offsets. 492 */ 493 endPosn = ftell(mZipFp); // seeked to end of compressed data 494 495 /* 496 * Success! Fill out new values. 497 */ 498 pEntry->setDataInfo(uncompressedLen, endPosn - startPosn, crc, 499 compressionMethod); 500 modWhen = getModTime(inputFp ? fileno(inputFp) : fileno(mZipFp)); 501 pEntry->setModWhen(modWhen); 502 pEntry->setLFHOffset(lfhPosn); 503 mEOCD.mNumEntries++; 504 mEOCD.mTotalNumEntries++; 505 mEOCD.mCentralDirSize = 0; // mark invalid; set by flush() 506 mEOCD.mCentralDirOffset = endPosn; 507 508 /* 509 * Go back and write the LFH. 510 */ 511 if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) { 512 result = UNKNOWN_ERROR; 513 goto bail; 514 } 515 pEntry->mLFH.write(mZipFp); 516 517 /* 518 * Add pEntry to the list. 519 */ 520 mEntries.add(pEntry); 521 if (ppEntry != NULL) 522 *ppEntry = pEntry; 523 pEntry = NULL; 524 525 bail: 526 if (inputFp != NULL) 527 fclose(inputFp); 528 delete pEntry; 529 return result; 530 } 531 532 /* 533 * Add an entry by copying it from another zip file. If "padding" is 534 * nonzero, the specified number of bytes will be added to the "extra" 535 * field in the header. 536 * 537 * If "ppEntry" is non-NULL, a pointer to the new entry will be returned. 538 */ 539 status_t ZipFile::add(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry, 540 int padding, ZipEntry** ppEntry) 541 { 542 ZipEntry* pEntry = NULL; 543 status_t result; 544 long lfhPosn, endPosn; 545 546 if (mReadOnly) 547 return INVALID_OPERATION; 548 549 /* make sure we're in a reasonable state */ 550 assert(mZipFp != NULL); 551 assert(mEntries.size() == mEOCD.mTotalNumEntries); 552 553 if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) { 554 result = UNKNOWN_ERROR; 555 goto bail; 556 } 557 558 pEntry = new ZipEntry; 559 if (pEntry == NULL) { 560 result = NO_MEMORY; 561 goto bail; 562 } 563 564 result = pEntry->initFromExternal(pSourceZip, pSourceEntry); 565 if (result != NO_ERROR) 566 goto bail; 567 if (padding != 0) { 568 result = pEntry->addPadding(padding); 569 if (result != NO_ERROR) 570 goto bail; 571 } 572 573 /* 574 * From here on out, failures are more interesting. 575 */ 576 mNeedCDRewrite = true; 577 578 /* 579 * Write the LFH. Since we're not recompressing the data, we already 580 * have all of the fields filled out. 581 */ 582 lfhPosn = ftell(mZipFp); 583 pEntry->mLFH.write(mZipFp); 584 585 /* 586 * Copy the data over. 587 * 588 * If the "has data descriptor" flag is set, we want to copy the DD 589 * fields as well. This is a fixed-size area immediately following 590 * the data. 591 */ 592 if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0) 593 { 594 result = UNKNOWN_ERROR; 595 goto bail; 596 } 597 598 off_t copyLen; 599 copyLen = pSourceEntry->getCompressedLen(); 600 if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0) 601 copyLen += ZipEntry::kDataDescriptorLen; 602 603 if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL) 604 != NO_ERROR) 605 { 606 LOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName); 607 result = UNKNOWN_ERROR; 608 goto bail; 609 } 610 611 /* 612 * Update file offsets. 613 */ 614 endPosn = ftell(mZipFp); 615 616 /* 617 * Success! Fill out new values. 618 */ 619 pEntry->setLFHOffset(lfhPosn); // sets mCDE.mLocalHeaderRelOffset 620 mEOCD.mNumEntries++; 621 mEOCD.mTotalNumEntries++; 622 mEOCD.mCentralDirSize = 0; // mark invalid; set by flush() 623 mEOCD.mCentralDirOffset = endPosn; 624 625 /* 626 * Add pEntry to the list. 627 */ 628 mEntries.add(pEntry); 629 if (ppEntry != NULL) 630 *ppEntry = pEntry; 631 pEntry = NULL; 632 633 result = NO_ERROR; 634 635 bail: 636 delete pEntry; 637 return result; 638 } 639 640 /* 641 * Copy all of the bytes in "src" to "dst". 642 * 643 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp" 644 * will be seeked immediately past the data. 645 */ 646 status_t ZipFile::copyFpToFp(FILE* dstFp, FILE* srcFp, unsigned long* pCRC32) 647 { 648 unsigned char tmpBuf[32768]; 649 size_t count; 650 651 *pCRC32 = crc32(0L, Z_NULL, 0); 652 653 while (1) { 654 count = fread(tmpBuf, 1, sizeof(tmpBuf), srcFp); 655 if (ferror(srcFp) || ferror(dstFp)) 656 return errnoToStatus(errno); 657 if (count == 0) 658 break; 659 660 *pCRC32 = crc32(*pCRC32, tmpBuf, count); 661 662 if (fwrite(tmpBuf, 1, count, dstFp) != count) { 663 LOGD("fwrite %d bytes failed\n", (int) count); 664 return UNKNOWN_ERROR; 665 } 666 } 667 668 return NO_ERROR; 669 } 670 671 /* 672 * Copy all of the bytes in "src" to "dst". 673 * 674 * On exit, "dstFp" will be seeked immediately past the data. 675 */ 676 status_t ZipFile::copyDataToFp(FILE* dstFp, 677 const void* data, size_t size, unsigned long* pCRC32) 678 { 679 size_t count; 680 681 *pCRC32 = crc32(0L, Z_NULL, 0); 682 if (size > 0) { 683 *pCRC32 = crc32(*pCRC32, (const unsigned char*)data, size); 684 if (fwrite(data, 1, size, dstFp) != size) { 685 LOGD("fwrite %d bytes failed\n", (int) size); 686 return UNKNOWN_ERROR; 687 } 688 } 689 690 return NO_ERROR; 691 } 692 693 /* 694 * Copy some of the bytes in "src" to "dst". 695 * 696 * If "pCRC32" is NULL, the CRC will not be computed. 697 * 698 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp" 699 * will be seeked immediately past the data just written. 700 */ 701 status_t ZipFile::copyPartialFpToFp(FILE* dstFp, FILE* srcFp, long length, 702 unsigned long* pCRC32) 703 { 704 unsigned char tmpBuf[32768]; 705 size_t count; 706 707 if (pCRC32 != NULL) 708 *pCRC32 = crc32(0L, Z_NULL, 0); 709 710 while (length) { 711 long readSize; 712 713 readSize = sizeof(tmpBuf); 714 if (readSize > length) 715 readSize = length; 716 717 count = fread(tmpBuf, 1, readSize, srcFp); 718 if ((long) count != readSize) { // error or unexpected EOF 719 LOGD("fread %d bytes failed\n", (int) readSize); 720 return UNKNOWN_ERROR; 721 } 722 723 if (pCRC32 != NULL) 724 *pCRC32 = crc32(*pCRC32, tmpBuf, count); 725 726 if (fwrite(tmpBuf, 1, count, dstFp) != count) { 727 LOGD("fwrite %d bytes failed\n", (int) count); 728 return UNKNOWN_ERROR; 729 } 730 731 length -= readSize; 732 } 733 734 return NO_ERROR; 735 } 736 737 /* 738 * Compress all of the data in "srcFp" and write it to "dstFp". 739 * 740 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp" 741 * will be seeked immediately past the compressed data. 742 */ 743 status_t ZipFile::compressFpToFp(FILE* dstFp, FILE* srcFp, 744 const void* data, size_t size, unsigned long* pCRC32) 745 { 746 status_t result = NO_ERROR; 747 const size_t kBufSize = 32768; 748 unsigned char* inBuf = NULL; 749 unsigned char* outBuf = NULL; 750 z_stream zstream; 751 bool atEof = false; // no feof() aviailable yet 752 unsigned long crc; 753 int zerr; 754 755 /* 756 * Create an input buffer and an output buffer. 757 */ 758 inBuf = new unsigned char[kBufSize]; 759 outBuf = new unsigned char[kBufSize]; 760 if (inBuf == NULL || outBuf == NULL) { 761 result = NO_MEMORY; 762 goto bail; 763 } 764 765 /* 766 * Initialize the zlib stream. 767 */ 768 memset(&zstream, 0, sizeof(zstream)); 769 zstream.zalloc = Z_NULL; 770 zstream.zfree = Z_NULL; 771 zstream.opaque = Z_NULL; 772 zstream.next_in = NULL; 773 zstream.avail_in = 0; 774 zstream.next_out = outBuf; 775 zstream.avail_out = kBufSize; 776 zstream.data_type = Z_UNKNOWN; 777 778 zerr = deflateInit2(&zstream, Z_BEST_COMPRESSION, 779 Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); 780 if (zerr != Z_OK) { 781 result = UNKNOWN_ERROR; 782 if (zerr == Z_VERSION_ERROR) { 783 LOGE("Installed zlib is not compatible with linked version (%s)\n", 784 ZLIB_VERSION); 785 } else { 786 LOGD("Call to deflateInit2 failed (zerr=%d)\n", zerr); 787 } 788 goto bail; 789 } 790 791 crc = crc32(0L, Z_NULL, 0); 792 793 /* 794 * Loop while we have data. 795 */ 796 do { 797 size_t getSize; 798 int flush; 799 800 /* only read if the input buffer is empty */ 801 if (zstream.avail_in == 0 && !atEof) { 802 LOGV("+++ reading %d bytes\n", (int)kBufSize); 803 if (data) { 804 getSize = size > kBufSize ? kBufSize : size; 805 memcpy(inBuf, data, getSize); 806 data = ((const char*)data) + getSize; 807 size -= getSize; 808 } else { 809 getSize = fread(inBuf, 1, kBufSize, srcFp); 810 if (ferror(srcFp)) { 811 LOGD("deflate read failed (errno=%d)\n", errno); 812 goto z_bail; 813 } 814 } 815 if (getSize < kBufSize) { 816 LOGV("+++ got %d bytes, EOF reached\n", 817 (int)getSize); 818 atEof = true; 819 } 820 821 crc = crc32(crc, inBuf, getSize); 822 823 zstream.next_in = inBuf; 824 zstream.avail_in = getSize; 825 } 826 827 if (atEof) 828 flush = Z_FINISH; /* tell zlib that we're done */ 829 else 830 flush = Z_NO_FLUSH; /* more to come! */ 831 832 zerr = deflate(&zstream, flush); 833 if (zerr != Z_OK && zerr != Z_STREAM_END) { 834 LOGD("zlib deflate call failed (zerr=%d)\n", zerr); 835 result = UNKNOWN_ERROR; 836 goto z_bail; 837 } 838 839 /* write when we're full or when we're done */ 840 if (zstream.avail_out == 0 || 841 (zerr == Z_STREAM_END && zstream.avail_out != (uInt) kBufSize)) 842 { 843 LOGV("+++ writing %d bytes\n", (int) (zstream.next_out - outBuf)); 844 if (fwrite(outBuf, 1, zstream.next_out - outBuf, dstFp) != 845 (size_t)(zstream.next_out - outBuf)) 846 { 847 LOGD("write %d failed in deflate\n", 848 (int) (zstream.next_out - outBuf)); 849 goto z_bail; 850 } 851 852 zstream.next_out = outBuf; 853 zstream.avail_out = kBufSize; 854 } 855 } while (zerr == Z_OK); 856 857 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 858 859 *pCRC32 = crc; 860 861 z_bail: 862 deflateEnd(&zstream); /* free up any allocated structures */ 863 864 bail: 865 delete[] inBuf; 866 delete[] outBuf; 867 868 return result; 869 } 870 871 /* 872 * Mark an entry as deleted. 873 * 874 * We will eventually need to crunch the file down, but if several files 875 * are being removed (perhaps as part of an "update" process) we can make 876 * things considerably faster by deferring the removal to "flush" time. 877 */ 878 status_t ZipFile::remove(ZipEntry* pEntry) 879 { 880 /* 881 * Should verify that pEntry is actually part of this archive, and 882 * not some stray ZipEntry from a different file. 883 */ 884 885 /* mark entry as deleted, and mark archive as dirty */ 886 pEntry->setDeleted(); 887 mNeedCDRewrite = true; 888 return NO_ERROR; 889 } 890 891 /* 892 * Flush any pending writes. 893 * 894 * In particular, this will crunch out deleted entries, and write the 895 * Central Directory and EOCD if we have stomped on them. 896 */ 897 status_t ZipFile::flush(void) 898 { 899 status_t result = NO_ERROR; 900 long eocdPosn; 901 int i, count; 902 903 if (mReadOnly) 904 return INVALID_OPERATION; 905 if (!mNeedCDRewrite) 906 return NO_ERROR; 907 908 assert(mZipFp != NULL); 909 910 result = crunchArchive(); 911 if (result != NO_ERROR) 912 return result; 913 914 if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) 915 return UNKNOWN_ERROR; 916 917 count = mEntries.size(); 918 for (i = 0; i < count; i++) { 919 ZipEntry* pEntry = mEntries[i]; 920 pEntry->mCDE.write(mZipFp); 921 } 922 923 eocdPosn = ftell(mZipFp); 924 mEOCD.mCentralDirSize = eocdPosn - mEOCD.mCentralDirOffset; 925 926 mEOCD.write(mZipFp); 927 928 /* 929 * If we had some stuff bloat up during compression and get replaced 930 * with plain files, or if we deleted some entries, there's a lot 931 * of wasted space at the end of the file. Remove it now. 932 */ 933 if (ftruncate(fileno(mZipFp), ftell(mZipFp)) != 0) { 934 LOGW("ftruncate failed %ld: %s\n", ftell(mZipFp), strerror(errno)); 935 // not fatal 936 } 937 938 /* should we clear the "newly added" flag in all entries now? */ 939 940 mNeedCDRewrite = false; 941 return NO_ERROR; 942 } 943 944 /* 945 * Crunch deleted files out of an archive by shifting the later files down. 946 * 947 * Because we're not using a temp file, we do the operation inside the 948 * current file. 949 */ 950 status_t ZipFile::crunchArchive(void) 951 { 952 status_t result = NO_ERROR; 953 int i, count; 954 long delCount, adjust; 955 956 #if 0 957 printf("CONTENTS:\n"); 958 for (i = 0; i < (int) mEntries.size(); i++) { 959 printf(" %d: lfhOff=%ld del=%d\n", 960 i, mEntries[i]->getLFHOffset(), mEntries[i]->getDeleted()); 961 } 962 printf(" END is %ld\n", (long) mEOCD.mCentralDirOffset); 963 #endif 964 965 /* 966 * Roll through the set of files, shifting them as appropriate. We 967 * could probably get a slight performance improvement by sliding 968 * multiple files down at once (because we could use larger reads 969 * when operating on batches of small files), but it's not that useful. 970 */ 971 count = mEntries.size(); 972 delCount = adjust = 0; 973 for (i = 0; i < count; i++) { 974 ZipEntry* pEntry = mEntries[i]; 975 long span; 976 977 if (pEntry->getLFHOffset() != 0) { 978 long nextOffset; 979 980 /* Get the length of this entry by finding the offset 981 * of the next entry. Directory entries don't have 982 * file offsets, so we need to find the next non-directory 983 * entry. 984 */ 985 nextOffset = 0; 986 for (int ii = i+1; nextOffset == 0 && ii < count; ii++) 987 nextOffset = mEntries[ii]->getLFHOffset(); 988 if (nextOffset == 0) 989 nextOffset = mEOCD.mCentralDirOffset; 990 span = nextOffset - pEntry->getLFHOffset(); 991 992 assert(span >= ZipEntry::LocalFileHeader::kLFHLen); 993 } else { 994 /* This is a directory entry. It doesn't have 995 * any actual file contents, so there's no need to 996 * move anything. 997 */ 998 span = 0; 999 } 1000 1001 //printf("+++ %d: off=%ld span=%ld del=%d [count=%d]\n", 1002 // i, pEntry->getLFHOffset(), span, pEntry->getDeleted(), count); 1003 1004 if (pEntry->getDeleted()) { 1005 adjust += span; 1006 delCount++; 1007 1008 delete pEntry; 1009 mEntries.removeAt(i); 1010 1011 /* adjust loop control */ 1012 count--; 1013 i--; 1014 } else if (span != 0 && adjust > 0) { 1015 /* shuffle this entry back */ 1016 //printf("+++ Shuffling '%s' back %ld\n", 1017 // pEntry->getFileName(), adjust); 1018 result = filemove(mZipFp, pEntry->getLFHOffset() - adjust, 1019 pEntry->getLFHOffset(), span); 1020 if (result != NO_ERROR) { 1021 /* this is why you use a temp file */ 1022 LOGE("error during crunch - archive is toast\n"); 1023 return result; 1024 } 1025 1026 pEntry->setLFHOffset(pEntry->getLFHOffset() - adjust); 1027 } 1028 } 1029 1030 /* 1031 * Fix EOCD info. We have to wait until the end to do some of this 1032 * because we use mCentralDirOffset to determine "span" for the 1033 * last entry. 1034 */ 1035 mEOCD.mCentralDirOffset -= adjust; 1036 mEOCD.mNumEntries -= delCount; 1037 mEOCD.mTotalNumEntries -= delCount; 1038 mEOCD.mCentralDirSize = 0; // mark invalid; set by flush() 1039 1040 assert(mEOCD.mNumEntries == mEOCD.mTotalNumEntries); 1041 assert(mEOCD.mNumEntries == count); 1042 1043 return result; 1044 } 1045 1046 /* 1047 * Works like memmove(), but on pieces of a file. 1048 */ 1049 status_t ZipFile::filemove(FILE* fp, off_t dst, off_t src, size_t n) 1050 { 1051 if (dst == src || n <= 0) 1052 return NO_ERROR; 1053 1054 unsigned char readBuf[32768]; 1055 1056 if (dst < src) { 1057 /* shift stuff toward start of file; must read from start */ 1058 while (n != 0) { 1059 size_t getSize = sizeof(readBuf); 1060 if (getSize > n) 1061 getSize = n; 1062 1063 if (fseek(fp, (long) src, SEEK_SET) != 0) { 1064 LOGD("filemove src seek %ld failed\n", (long) src); 1065 return UNKNOWN_ERROR; 1066 } 1067 1068 if (fread(readBuf, 1, getSize, fp) != getSize) { 1069 LOGD("filemove read %ld off=%ld failed\n", 1070 (long) getSize, (long) src); 1071 return UNKNOWN_ERROR; 1072 } 1073 1074 if (fseek(fp, (long) dst, SEEK_SET) != 0) { 1075 LOGD("filemove dst seek %ld failed\n", (long) dst); 1076 return UNKNOWN_ERROR; 1077 } 1078 1079 if (fwrite(readBuf, 1, getSize, fp) != getSize) { 1080 LOGD("filemove write %ld off=%ld failed\n", 1081 (long) getSize, (long) dst); 1082 return UNKNOWN_ERROR; 1083 } 1084 1085 src += getSize; 1086 dst += getSize; 1087 n -= getSize; 1088 } 1089 } else { 1090 /* shift stuff toward end of file; must read from end */ 1091 assert(false); // write this someday, maybe 1092 return UNKNOWN_ERROR; 1093 } 1094 1095 return NO_ERROR; 1096 } 1097 1098 1099 /* 1100 * Get the modification time from a file descriptor. 1101 */ 1102 time_t ZipFile::getModTime(int fd) 1103 { 1104 struct stat sb; 1105 1106 if (fstat(fd, &sb) < 0) { 1107 LOGD("HEY: fstat on fd %d failed\n", fd); 1108 return (time_t) -1; 1109 } 1110 1111 return sb.st_mtime; 1112 } 1113 1114 1115 #if 0 /* this is a bad idea */ 1116 /* 1117 * Get a copy of the Zip file descriptor. 1118 * 1119 * We don't allow this if the file was opened read-write because we tend 1120 * to leave the file contents in an uncertain state between calls to 1121 * flush(). The duplicated file descriptor should only be valid for reads. 1122 */ 1123 int ZipFile::getZipFd(void) const 1124 { 1125 if (!mReadOnly) 1126 return INVALID_OPERATION; 1127 assert(mZipFp != NULL); 1128 1129 int fd; 1130 fd = dup(fileno(mZipFp)); 1131 if (fd < 0) { 1132 LOGD("didn't work, errno=%d\n", errno); 1133 } 1134 1135 return fd; 1136 } 1137 #endif 1138 1139 1140 #if 0 1141 /* 1142 * Expand data. 1143 */ 1144 bool ZipFile::uncompress(const ZipEntry* pEntry, void* buf) const 1145 { 1146 return false; 1147 } 1148 #endif 1149 1150 // free the memory when you're done 1151 void* ZipFile::uncompress(const ZipEntry* entry) 1152 { 1153 size_t unlen = entry->getUncompressedLen(); 1154 size_t clen = entry->getCompressedLen(); 1155 1156 void* buf = malloc(unlen); 1157 if (buf == NULL) { 1158 return NULL; 1159 } 1160 1161 fseek(mZipFp, 0, SEEK_SET); 1162 1163 off_t offset = entry->getFileOffset(); 1164 if (fseek(mZipFp, offset, SEEK_SET) != 0) { 1165 goto bail; 1166 } 1167 1168 switch (entry->getCompressionMethod()) 1169 { 1170 case ZipEntry::kCompressStored: { 1171 ssize_t amt = fread(buf, 1, unlen, mZipFp); 1172 if (amt != (ssize_t)unlen) { 1173 goto bail; 1174 } 1175 #if 0 1176 printf("data...\n"); 1177 const unsigned char* p = (unsigned char*)buf; 1178 const unsigned char* end = p+unlen; 1179 for (int i=0; i<32 && p < end; i++) { 1180 printf("0x%08x ", (int)(offset+(i*0x10))); 1181 for (int j=0; j<0x10 && p < end; j++) { 1182 printf(" %02x", *p); 1183 p++; 1184 } 1185 printf("\n"); 1186 } 1187 #endif 1188 1189 } 1190 break; 1191 case ZipEntry::kCompressDeflated: { 1192 if (!ZipUtils::inflateToBuffer(mZipFp, buf, unlen, clen)) { 1193 goto bail; 1194 } 1195 } 1196 break; 1197 default: 1198 goto bail; 1199 } 1200 return buf; 1201 1202 bail: 1203 free(buf); 1204 return NULL; 1205 } 1206 1207 1208 /* 1209 * =========================================================================== 1210 * ZipFile::EndOfCentralDir 1211 * =========================================================================== 1212 */ 1213 1214 /* 1215 * Read the end-of-central-dir fields. 1216 * 1217 * "buf" should be positioned at the EOCD signature, and should contain 1218 * the entire EOCD area including the comment. 1219 */ 1220 status_t ZipFile::EndOfCentralDir::readBuf(const unsigned char* buf, int len) 1221 { 1222 /* don't allow re-use */ 1223 assert(mComment == NULL); 1224 1225 if (len < kEOCDLen) { 1226 /* looks like ZIP file got truncated */ 1227 LOGD(" Zip EOCD: expected >= %d bytes, found %d\n", 1228 kEOCDLen, len); 1229 return INVALID_OPERATION; 1230 } 1231 1232 /* this should probably be an assert() */ 1233 if (ZipEntry::getLongLE(&buf[0x00]) != kSignature) 1234 return UNKNOWN_ERROR; 1235 1236 mDiskNumber = ZipEntry::getShortLE(&buf[0x04]); 1237 mDiskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]); 1238 mNumEntries = ZipEntry::getShortLE(&buf[0x08]); 1239 mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]); 1240 mCentralDirSize = ZipEntry::getLongLE(&buf[0x0c]); 1241 mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]); 1242 mCommentLen = ZipEntry::getShortLE(&buf[0x14]); 1243 1244 // TODO: validate mCentralDirOffset 1245 1246 if (mCommentLen > 0) { 1247 if (kEOCDLen + mCommentLen > len) { 1248 LOGD("EOCD(%d) + comment(%d) exceeds len (%d)\n", 1249 kEOCDLen, mCommentLen, len); 1250 return UNKNOWN_ERROR; 1251 } 1252 mComment = new unsigned char[mCommentLen]; 1253 memcpy(mComment, buf + kEOCDLen, mCommentLen); 1254 } 1255 1256 return NO_ERROR; 1257 } 1258 1259 /* 1260 * Write an end-of-central-directory section. 1261 */ 1262 status_t ZipFile::EndOfCentralDir::write(FILE* fp) 1263 { 1264 unsigned char buf[kEOCDLen]; 1265 1266 ZipEntry::putLongLE(&buf[0x00], kSignature); 1267 ZipEntry::putShortLE(&buf[0x04], mDiskNumber); 1268 ZipEntry::putShortLE(&buf[0x06], mDiskWithCentralDir); 1269 ZipEntry::putShortLE(&buf[0x08], mNumEntries); 1270 ZipEntry::putShortLE(&buf[0x0a], mTotalNumEntries); 1271 ZipEntry::putLongLE(&buf[0x0c], mCentralDirSize); 1272 ZipEntry::putLongLE(&buf[0x10], mCentralDirOffset); 1273 ZipEntry::putShortLE(&buf[0x14], mCommentLen); 1274 1275 if (fwrite(buf, 1, kEOCDLen, fp) != kEOCDLen) 1276 return UNKNOWN_ERROR; 1277 if (mCommentLen > 0) { 1278 assert(mComment != NULL); 1279 if (fwrite(mComment, mCommentLen, 1, fp) != mCommentLen) 1280 return UNKNOWN_ERROR; 1281 } 1282 1283 return NO_ERROR; 1284 } 1285 1286 /* 1287 * Dump the contents of an EndOfCentralDir object. 1288 */ 1289 void ZipFile::EndOfCentralDir::dump(void) const 1290 { 1291 LOGD(" EndOfCentralDir contents:\n"); 1292 LOGD(" diskNum=%u diskWCD=%u numEnt=%u totalNumEnt=%u\n", 1293 mDiskNumber, mDiskWithCentralDir, mNumEntries, mTotalNumEntries); 1294 LOGD(" centDirSize=%lu centDirOff=%lu commentLen=%u\n", 1295 mCentralDirSize, mCentralDirOffset, mCommentLen); 1296 } 1297 1298