1 /* 2 * Copyright 2006 The Android Open Source Project 3 * 4 * Simple Zip file support. 5 */ 6 #include "safe_iop.h" 7 #include "zlib.h" 8 9 #include <errno.h> 10 #include <fcntl.h> 11 #include <limits.h> 12 #include <stdint.h> // for uintptr_t 13 #include <stdlib.h> 14 #include <sys/stat.h> // for S_ISLNK() 15 #include <unistd.h> 16 17 #define LOG_TAG "minzip" 18 #include "Zip.h" 19 #include "Bits.h" 20 #include "Log.h" 21 #include "DirUtil.h" 22 23 #undef NDEBUG // do this after including Log.h 24 #include <assert.h> 25 26 #define SORT_ENTRIES 1 27 28 /* 29 * Offset and length constants (java.util.zip naming convention). 30 */ 31 enum { 32 CENSIG = 0x02014b50, // PK12 33 CENHDR = 46, 34 35 CENVEM = 4, 36 CENVER = 6, 37 CENFLG = 8, 38 CENHOW = 10, 39 CENTIM = 12, 40 CENCRC = 16, 41 CENSIZ = 20, 42 CENLEN = 24, 43 CENNAM = 28, 44 CENEXT = 30, 45 CENCOM = 32, 46 CENDSK = 34, 47 CENATT = 36, 48 CENATX = 38, 49 CENOFF = 42, 50 51 ENDSIG = 0x06054b50, // PK56 52 ENDHDR = 22, 53 54 ENDSUB = 8, 55 ENDTOT = 10, 56 ENDSIZ = 12, 57 ENDOFF = 16, 58 ENDCOM = 20, 59 60 EXTSIG = 0x08074b50, // PK78 61 EXTHDR = 16, 62 63 EXTCRC = 4, 64 EXTSIZ = 8, 65 EXTLEN = 12, 66 67 LOCSIG = 0x04034b50, // PK34 68 LOCHDR = 30, 69 70 LOCVER = 4, 71 LOCFLG = 6, 72 LOCHOW = 8, 73 LOCTIM = 10, 74 LOCCRC = 14, 75 LOCSIZ = 18, 76 LOCLEN = 22, 77 LOCNAM = 26, 78 LOCEXT = 28, 79 80 STORED = 0, 81 DEFLATED = 8, 82 83 CENVEM_UNIX = 3 << 8, // the high byte of CENVEM 84 }; 85 86 87 /* 88 * For debugging, dump the contents of a ZipEntry. 89 */ 90 #if 0 91 static void dumpEntry(const ZipEntry* pEntry) 92 { 93 LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName); 94 LOGI(" off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset, 95 pEntry->compLen, pEntry->uncompLen, pEntry->compression); 96 } 97 #endif 98 99 /* 100 * (This is a mzHashTableLookup callback.) 101 * 102 * Compare two ZipEntry structs, by name. 103 */ 104 static int hashcmpZipEntry(const void* ventry1, const void* ventry2) 105 { 106 const ZipEntry* entry1 = (const ZipEntry*) ventry1; 107 const ZipEntry* entry2 = (const ZipEntry*) ventry2; 108 109 if (entry1->fileNameLen != entry2->fileNameLen) 110 return entry1->fileNameLen - entry2->fileNameLen; 111 return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen); 112 } 113 114 /* 115 * (This is a mzHashTableLookup callback.) 116 * 117 * find a ZipEntry struct by name. 118 */ 119 static int hashcmpZipName(const void* ventry, const void* vname) 120 { 121 const ZipEntry* entry = (const ZipEntry*) ventry; 122 const char* name = (const char*) vname; 123 unsigned int nameLen = strlen(name); 124 125 if (entry->fileNameLen != nameLen) 126 return entry->fileNameLen - nameLen; 127 return memcmp(entry->fileName, name, nameLen); 128 } 129 130 /* 131 * Compute the hash code for a ZipEntry filename. 132 * 133 * Not expected to be compatible with any other hash function, so we init 134 * to 2 to ensure it doesn't happen to match. 135 */ 136 static unsigned int computeHash(const char* name, int nameLen) 137 { 138 unsigned int hash = 2; 139 140 while (nameLen--) 141 hash = hash * 31 + *name++; 142 143 return hash; 144 } 145 146 static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry) 147 { 148 unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen); 149 const ZipEntry* found; 150 151 found = (const ZipEntry*)mzHashTableLookup(pHash, 152 itemHash, pEntry, hashcmpZipEntry, true); 153 if (found != pEntry) { 154 LOGW("WARNING: duplicate entry '%.*s' in Zip\n", 155 found->fileNameLen, found->fileName); 156 /* keep going */ 157 } 158 } 159 160 static int validFilename(const char *fileName, unsigned int fileNameLen) 161 { 162 // Forbid super long filenames. 163 if (fileNameLen >= PATH_MAX) { 164 LOGW("Filename too long (%d chatacters)\n", fileNameLen); 165 return 0; 166 } 167 168 // Require all characters to be printable ASCII (no NUL, no UTF-8, etc). 169 unsigned int i; 170 for (i = 0; i < fileNameLen; ++i) { 171 if (fileName[i] < 32 || fileName[i] >= 127) { 172 LOGW("Filename contains invalid character '\%03o'\n", fileName[i]); 173 return 0; 174 } 175 } 176 177 return 1; 178 } 179 180 /* 181 * Parse the contents of a Zip archive. After confirming that the file 182 * is in fact a Zip, we scan out the contents of the central directory and 183 * store it in a hash table. 184 * 185 * Returns "true" on success. 186 */ 187 static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap) 188 { 189 bool result = false; 190 const unsigned char* ptr; 191 unsigned int i, numEntries, cdOffset; 192 unsigned int val; 193 194 /* 195 * The first 4 bytes of the file will either be the local header 196 * signature for the first file (LOCSIG) or, if the archive doesn't 197 * have any files in it, the end-of-central-directory signature (ENDSIG). 198 */ 199 val = get4LE(pMap->addr); 200 if (val == ENDSIG) { 201 LOGI("Found Zip archive, but it looks empty\n"); 202 goto bail; 203 } else if (val != LOCSIG) { 204 LOGV("Not a Zip archive (found 0x%08x)\n", val); 205 goto bail; 206 } 207 208 /* 209 * Find the EOCD. We'll find it immediately unless they have a file 210 * comment. 211 */ 212 ptr = pMap->addr + pMap->length - ENDHDR; 213 214 while (ptr >= (const unsigned char*) pMap->addr) { 215 if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG) 216 break; 217 ptr--; 218 } 219 if (ptr < (const unsigned char*) pMap->addr) { 220 LOGI("Could not find end-of-central-directory in Zip\n"); 221 goto bail; 222 } 223 224 /* 225 * There are two interesting items in the EOCD block: the number of 226 * entries in the file, and the file offset of the start of the 227 * central directory. 228 */ 229 numEntries = get2LE(ptr + ENDSUB); 230 cdOffset = get4LE(ptr + ENDOFF); 231 232 LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset); 233 if (numEntries == 0 || cdOffset >= pMap->length) { 234 LOGW("Invalid entries=%d offset=%d (len=%zd)\n", 235 numEntries, cdOffset, pMap->length); 236 goto bail; 237 } 238 239 /* 240 * Create data structures to hold entries. 241 */ 242 pArchive->numEntries = numEntries; 243 pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry)); 244 pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL); 245 if (pArchive->pEntries == NULL || pArchive->pHash == NULL) 246 goto bail; 247 248 ptr = pMap->addr + cdOffset; 249 for (i = 0; i < numEntries; i++) { 250 ZipEntry* pEntry; 251 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; 252 const unsigned char* localHdr; 253 const char *fileName; 254 255 if (ptr + CENHDR > (const unsigned char*)pMap->addr + pMap->length) { 256 LOGW("Ran off the end (at %d)\n", i); 257 goto bail; 258 } 259 if (get4LE(ptr) != CENSIG) { 260 LOGW("Missed a central dir sig (at %d)\n", i); 261 goto bail; 262 } 263 264 localHdrOffset = get4LE(ptr + CENOFF); 265 fileNameLen = get2LE(ptr + CENNAM); 266 extraLen = get2LE(ptr + CENEXT); 267 commentLen = get2LE(ptr + CENCOM); 268 fileName = (const char*)ptr + CENHDR; 269 if (fileName + fileNameLen > (const char*)pMap->addr + pMap->length) { 270 LOGW("Filename ran off the end (at %d)\n", i); 271 goto bail; 272 } 273 if (!validFilename(fileName, fileNameLen)) { 274 LOGW("Invalid filename (at %d)\n", i); 275 goto bail; 276 } 277 278 #if SORT_ENTRIES 279 /* Figure out where this entry should go (binary search). 280 */ 281 if (i > 0) { 282 int low, high; 283 284 low = 0; 285 high = i - 1; 286 while (low <= high) { 287 int mid; 288 int diff; 289 int diffLen; 290 291 mid = low + ((high - low) / 2); // avoid overflow 292 293 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) { 294 diffLen = pArchive->pEntries[mid].fileNameLen; 295 } else { 296 diffLen = fileNameLen; 297 } 298 diff = strncmp(pArchive->pEntries[mid].fileName, fileName, 299 diffLen); 300 if (diff == 0) { 301 diff = pArchive->pEntries[mid].fileNameLen - fileNameLen; 302 } 303 if (diff < 0) { 304 low = mid + 1; 305 } else if (diff > 0) { 306 high = mid - 1; 307 } else { 308 high = mid; 309 break; 310 } 311 } 312 313 unsigned int target = high + 1; 314 assert(target <= i); 315 if (target != i) { 316 /* It belongs somewhere other than at the end of 317 * the list. Make some room at [target]. 318 */ 319 memmove(pArchive->pEntries + target + 1, 320 pArchive->pEntries + target, 321 (i - target) * sizeof(ZipEntry)); 322 } 323 pEntry = &pArchive->pEntries[target]; 324 } else { 325 pEntry = &pArchive->pEntries[0]; 326 } 327 #else 328 pEntry = &pArchive->pEntries[i]; 329 #endif 330 331 //LOGI("%d: localHdr=%d fnl=%d el=%d cl=%d\n", 332 // i, localHdrOffset, fileNameLen, extraLen, commentLen); 333 334 pEntry->fileNameLen = fileNameLen; 335 pEntry->fileName = fileName; 336 337 pEntry->compLen = get4LE(ptr + CENSIZ); 338 pEntry->uncompLen = get4LE(ptr + CENLEN); 339 pEntry->compression = get2LE(ptr + CENHOW); 340 pEntry->modTime = get4LE(ptr + CENTIM); 341 pEntry->crc32 = get4LE(ptr + CENCRC); 342 343 /* These two are necessary for finding the mode of the file. 344 */ 345 pEntry->versionMadeBy = get2LE(ptr + CENVEM); 346 if ((pEntry->versionMadeBy & 0xff00) != 0 && 347 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX) 348 { 349 LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n", 350 pEntry->versionMadeBy >> 8, i); 351 goto bail; 352 } 353 pEntry->externalFileAttributes = get4LE(ptr + CENATX); 354 355 // Perform pMap->addr + localHdrOffset, ensuring that it won't 356 // overflow. This is needed because localHdrOffset is untrusted. 357 if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pMap->addr, 358 (uintptr_t)localHdrOffset)) { 359 LOGW("Integer overflow adding in parseZipArchive\n"); 360 goto bail; 361 } 362 if ((uintptr_t)localHdr + LOCHDR > 363 (uintptr_t)pMap->addr + pMap->length) { 364 LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i); 365 goto bail; 366 } 367 if (get4LE(localHdr) != LOCSIG) { 368 LOGW("Missed a local header sig (at %d)\n", i); 369 goto bail; 370 } 371 pEntry->offset = localHdrOffset + LOCHDR 372 + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT); 373 if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) { 374 LOGW("Integer overflow adding in parseZipArchive\n"); 375 goto bail; 376 } 377 if ((size_t)pEntry->offset + pEntry->compLen > pMap->length) { 378 LOGW("Data ran off the end (at %d)\n", i); 379 goto bail; 380 } 381 382 #if !SORT_ENTRIES 383 /* Add to hash table; no need to lock here. 384 * Can't do this now if we're sorting, because entries 385 * will move around. 386 */ 387 addEntryToHashTable(pArchive->pHash, pEntry); 388 #endif 389 390 //dumpEntry(pEntry); 391 ptr += CENHDR + fileNameLen + extraLen + commentLen; 392 } 393 394 #if SORT_ENTRIES 395 /* If we're sorting, we have to wait until all entries 396 * are in their final places, otherwise the pointers will 397 * probably point to the wrong things. 398 */ 399 for (i = 0; i < numEntries; i++) { 400 /* Add to hash table; no need to lock here. 401 */ 402 addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]); 403 } 404 #endif 405 406 result = true; 407 408 bail: 409 if (!result) { 410 mzHashTableFree(pArchive->pHash); 411 pArchive->pHash = NULL; 412 } 413 return result; 414 } 415 416 /* 417 * Open a Zip archive and scan out the contents. 418 * 419 * The easiest way to do this is to mmap() the whole thing and do the 420 * traditional backward scan for central directory. Since the EOCD is 421 * a relatively small bit at the end, we should end up only touching a 422 * small set of pages. 423 * 424 * This will be called on non-Zip files, especially during startup, so 425 * we don't want to be too noisy about failures. (Do we want a "quiet" 426 * flag?) 427 * 428 * On success, we fill out the contents of "pArchive". 429 */ 430 int mzOpenZipArchive(const char* fileName, ZipArchive* pArchive) 431 { 432 MemMapping map; 433 int err; 434 435 LOGV("Opening archive '%s' %p\n", fileName, pArchive); 436 437 map.addr = NULL; 438 memset(pArchive, 0, sizeof(*pArchive)); 439 440 pArchive->fd = open(fileName, O_RDONLY, 0); 441 if (pArchive->fd < 0) { 442 err = errno ? errno : -1; 443 LOGV("Unable to open '%s': %s\n", fileName, strerror(err)); 444 goto bail; 445 } 446 447 if (sysMapFileInShmem(pArchive->fd, &map) != 0) { 448 err = -1; 449 LOGW("Map of '%s' failed\n", fileName); 450 goto bail; 451 } 452 453 if (map.length < ENDHDR) { 454 err = -1; 455 LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length); 456 goto bail; 457 } 458 459 if (!parseZipArchive(pArchive, &map)) { 460 err = -1; 461 LOGV("Parsing '%s' failed\n", fileName); 462 goto bail; 463 } 464 465 err = 0; 466 sysCopyMap(&pArchive->map, &map); 467 map.addr = NULL; 468 469 bail: 470 if (err != 0) 471 mzCloseZipArchive(pArchive); 472 if (map.addr != NULL) 473 sysReleaseShmem(&map); 474 return err; 475 } 476 477 /* 478 * Close a ZipArchive, closing the file and freeing the contents. 479 * 480 * NOTE: the ZipArchive may not have been fully created. 481 */ 482 void mzCloseZipArchive(ZipArchive* pArchive) 483 { 484 LOGV("Closing archive %p\n", pArchive); 485 486 if (pArchive->fd >= 0) 487 close(pArchive->fd); 488 if (pArchive->map.addr != NULL) 489 sysReleaseShmem(&pArchive->map); 490 491 free(pArchive->pEntries); 492 493 mzHashTableFree(pArchive->pHash); 494 495 pArchive->fd = -1; 496 pArchive->pHash = NULL; 497 pArchive->pEntries = NULL; 498 } 499 500 /* 501 * Find a matching entry. 502 * 503 * Returns NULL if no matching entry found. 504 */ 505 const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive, 506 const char* entryName) 507 { 508 unsigned int itemHash = computeHash(entryName, strlen(entryName)); 509 510 return (const ZipEntry*)mzHashTableLookup(pArchive->pHash, 511 itemHash, (char*) entryName, hashcmpZipName, false); 512 } 513 514 /* 515 * Return true if the entry is a symbolic link. 516 */ 517 bool mzIsZipEntrySymlink(const ZipEntry* pEntry) 518 { 519 if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) { 520 return S_ISLNK(pEntry->externalFileAttributes >> 16); 521 } 522 return false; 523 } 524 525 /* Call processFunction on the uncompressed data of a STORED entry. 526 */ 527 static bool processStoredEntry(const ZipArchive *pArchive, 528 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 529 void *cookie) 530 { 531 size_t bytesLeft = pEntry->compLen; 532 while (bytesLeft > 0) { 533 unsigned char buf[32 * 1024]; 534 ssize_t n; 535 size_t count; 536 bool ret; 537 538 count = bytesLeft; 539 if (count > sizeof(buf)) { 540 count = sizeof(buf); 541 } 542 n = read(pArchive->fd, buf, count); 543 if (n < 0 || (size_t)n != count) { 544 LOGE("Can't read %zu bytes from zip file: %ld\n", count, n); 545 return false; 546 } 547 ret = processFunction(buf, n, cookie); 548 if (!ret) { 549 return false; 550 } 551 bytesLeft -= count; 552 } 553 return true; 554 } 555 556 static bool processDeflatedEntry(const ZipArchive *pArchive, 557 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 558 void *cookie) 559 { 560 long result = -1; 561 unsigned char readBuf[32 * 1024]; 562 unsigned char procBuf[32 * 1024]; 563 z_stream zstream; 564 int zerr; 565 long compRemaining; 566 567 compRemaining = pEntry->compLen; 568 569 /* 570 * Initialize the zlib stream. 571 */ 572 memset(&zstream, 0, sizeof(zstream)); 573 zstream.zalloc = Z_NULL; 574 zstream.zfree = Z_NULL; 575 zstream.opaque = Z_NULL; 576 zstream.next_in = NULL; 577 zstream.avail_in = 0; 578 zstream.next_out = (Bytef*) procBuf; 579 zstream.avail_out = sizeof(procBuf); 580 zstream.data_type = Z_UNKNOWN; 581 582 /* 583 * Use the undocumented "negative window bits" feature to tell zlib 584 * that there's no zlib header waiting for it. 585 */ 586 zerr = inflateInit2(&zstream, -MAX_WBITS); 587 if (zerr != Z_OK) { 588 if (zerr == Z_VERSION_ERROR) { 589 LOGE("Installed zlib is not compatible with linked version (%s)\n", 590 ZLIB_VERSION); 591 } else { 592 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 593 } 594 goto bail; 595 } 596 597 /* 598 * Loop while we have data. 599 */ 600 do { 601 /* read as much as we can */ 602 if (zstream.avail_in == 0) { 603 long getSize = (compRemaining > (long)sizeof(readBuf)) ? 604 (long)sizeof(readBuf) : compRemaining; 605 LOGVV("+++ reading %ld bytes (%ld left)\n", 606 getSize, compRemaining); 607 608 int cc = read(pArchive->fd, readBuf, getSize); 609 if (cc != (int) getSize) { 610 LOGW("inflate read failed (%d vs %ld)\n", cc, getSize); 611 goto z_bail; 612 } 613 614 compRemaining -= getSize; 615 616 zstream.next_in = readBuf; 617 zstream.avail_in = getSize; 618 } 619 620 /* uncompress the data */ 621 zerr = inflate(&zstream, Z_NO_FLUSH); 622 if (zerr != Z_OK && zerr != Z_STREAM_END) { 623 LOGD("zlib inflate call failed (zerr=%d)\n", zerr); 624 goto z_bail; 625 } 626 627 /* write when we're full or when we're done */ 628 if (zstream.avail_out == 0 || 629 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf))) 630 { 631 long procSize = zstream.next_out - procBuf; 632 LOGVV("+++ processing %d bytes\n", (int) procSize); 633 bool ret = processFunction(procBuf, procSize, cookie); 634 if (!ret) { 635 LOGW("Process function elected to fail (in inflate)\n"); 636 goto z_bail; 637 } 638 639 zstream.next_out = procBuf; 640 zstream.avail_out = sizeof(procBuf); 641 } 642 } while (zerr == Z_OK); 643 644 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 645 646 // success! 647 result = zstream.total_out; 648 649 z_bail: 650 inflateEnd(&zstream); /* free up any allocated structures */ 651 652 bail: 653 if (result != pEntry->uncompLen) { 654 if (result != -1) // error already shown? 655 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 656 result, pEntry->uncompLen); 657 return false; 658 } 659 return true; 660 } 661 662 /* 663 * Stream the uncompressed data through the supplied function, 664 * passing cookie to it each time it gets called. processFunction 665 * may be called more than once. 666 * 667 * If processFunction returns false, the operation is abandoned and 668 * mzProcessZipEntryContents() immediately returns false. 669 * 670 * This is useful for calculating the hash of an entry's uncompressed contents. 671 */ 672 bool mzProcessZipEntryContents(const ZipArchive *pArchive, 673 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 674 void *cookie) 675 { 676 bool ret = false; 677 off_t oldOff; 678 679 /* save current offset */ 680 oldOff = lseek(pArchive->fd, 0, SEEK_CUR); 681 682 /* Seek to the beginning of the entry's compressed data. */ 683 lseek(pArchive->fd, pEntry->offset, SEEK_SET); 684 685 switch (pEntry->compression) { 686 case STORED: 687 ret = processStoredEntry(pArchive, pEntry, processFunction, cookie); 688 break; 689 case DEFLATED: 690 ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie); 691 break; 692 default: 693 LOGE("Unsupported compression type %d for entry '%s'\n", 694 pEntry->compression, pEntry->fileName); 695 break; 696 } 697 698 /* restore file offset */ 699 lseek(pArchive->fd, oldOff, SEEK_SET); 700 return ret; 701 } 702 703 static bool crcProcessFunction(const unsigned char *data, int dataLen, 704 void *crc) 705 { 706 *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen); 707 return true; 708 } 709 710 /* 711 * Check the CRC on this entry; return true if it is correct. 712 * May do other internal checks as well. 713 */ 714 bool mzIsZipEntryIntact(const ZipArchive *pArchive, const ZipEntry *pEntry) 715 { 716 unsigned long crc; 717 bool ret; 718 719 crc = crc32(0L, Z_NULL, 0); 720 ret = mzProcessZipEntryContents(pArchive, pEntry, crcProcessFunction, 721 (void *)&crc); 722 if (!ret) { 723 LOGE("Can't calculate CRC for entry\n"); 724 return false; 725 } 726 if (crc != (unsigned long)pEntry->crc32) { 727 LOGW("CRC for entry %.*s (0x%08lx) != expected (0x%08lx)\n", 728 pEntry->fileNameLen, pEntry->fileName, crc, pEntry->crc32); 729 return false; 730 } 731 return true; 732 } 733 734 typedef struct { 735 char *buf; 736 int bufLen; 737 } CopyProcessArgs; 738 739 static bool copyProcessFunction(const unsigned char *data, int dataLen, 740 void *cookie) 741 { 742 CopyProcessArgs *args = (CopyProcessArgs *)cookie; 743 if (dataLen <= args->bufLen) { 744 memcpy(args->buf, data, dataLen); 745 args->buf += dataLen; 746 args->bufLen -= dataLen; 747 return true; 748 } 749 return false; 750 } 751 752 /* 753 * Read an entry into a buffer allocated by the caller. 754 */ 755 bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry, 756 char *buf, int bufLen) 757 { 758 CopyProcessArgs args; 759 bool ret; 760 761 args.buf = buf; 762 args.bufLen = bufLen; 763 ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction, 764 (void *)&args); 765 if (!ret) { 766 LOGE("Can't extract entry to buffer.\n"); 767 return false; 768 } 769 return true; 770 } 771 772 static bool writeProcessFunction(const unsigned char *data, int dataLen, 773 void *cookie) 774 { 775 int fd = (int)cookie; 776 777 ssize_t soFar = 0; 778 while (true) { 779 ssize_t n = write(fd, data+soFar, dataLen-soFar); 780 if (n <= 0) { 781 LOGE("Error writing %ld bytes from zip file from %p: %s\n", 782 dataLen-soFar, data+soFar, strerror(errno)); 783 if (errno != EINTR) { 784 return false; 785 } 786 } else if (n > 0) { 787 soFar += n; 788 if (soFar == dataLen) return true; 789 if (soFar > dataLen) { 790 LOGE("write overrun? (%ld bytes instead of %d)\n", 791 soFar, dataLen); 792 return false; 793 } 794 } 795 } 796 } 797 798 /* 799 * Uncompress "pEntry" in "pArchive" to "fd" at the current offset. 800 */ 801 bool mzExtractZipEntryToFile(const ZipArchive *pArchive, 802 const ZipEntry *pEntry, int fd) 803 { 804 bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction, 805 (void*)fd); 806 if (!ret) { 807 LOGE("Can't extract entry to file.\n"); 808 return false; 809 } 810 return true; 811 } 812 813 typedef struct { 814 unsigned char* buffer; 815 long len; 816 } BufferExtractCookie; 817 818 static bool bufferProcessFunction(const unsigned char *data, int dataLen, 819 void *cookie) { 820 BufferExtractCookie *bec = (BufferExtractCookie*)cookie; 821 822 memmove(bec->buffer, data, dataLen); 823 bec->buffer += dataLen; 824 bec->len -= dataLen; 825 826 return true; 827 } 828 829 /* 830 * Uncompress "pEntry" in "pArchive" to buffer, which must be large 831 * enough to hold mzGetZipEntryUncomplen(pEntry) bytes. 832 */ 833 bool mzExtractZipEntryToBuffer(const ZipArchive *pArchive, 834 const ZipEntry *pEntry, unsigned char *buffer) 835 { 836 BufferExtractCookie bec; 837 bec.buffer = buffer; 838 bec.len = mzGetZipEntryUncompLen(pEntry); 839 840 bool ret = mzProcessZipEntryContents(pArchive, pEntry, 841 bufferProcessFunction, (void*)&bec); 842 if (!ret || bec.len != 0) { 843 LOGE("Can't extract entry to memory buffer.\n"); 844 return false; 845 } 846 return true; 847 } 848 849 850 /* Helper state to make path translation easier and less malloc-happy. 851 */ 852 typedef struct { 853 const char *targetDir; 854 const char *zipDir; 855 char *buf; 856 int targetDirLen; 857 int zipDirLen; 858 int bufLen; 859 } MzPathHelper; 860 861 /* Given the values of targetDir and zipDir in the helper, 862 * return the target filename of the provided entry. 863 * The helper must be initialized first. 864 */ 865 static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry) 866 { 867 int needLen; 868 bool firstTime = (helper->buf == NULL); 869 870 /* target file <-- targetDir + / + entry[zipDirLen:] 871 */ 872 needLen = helper->targetDirLen + 1 + 873 pEntry->fileNameLen - helper->zipDirLen + 1; 874 if (needLen > helper->bufLen) { 875 char *newBuf; 876 877 needLen *= 2; 878 newBuf = (char *)realloc(helper->buf, needLen); 879 if (newBuf == NULL) { 880 return NULL; 881 } 882 helper->buf = newBuf; 883 helper->bufLen = needLen; 884 } 885 886 /* Every path will start with the target path and a slash. 887 */ 888 if (firstTime) { 889 char *p = helper->buf; 890 memcpy(p, helper->targetDir, helper->targetDirLen); 891 p += helper->targetDirLen; 892 if (p == helper->buf || p[-1] != '/') { 893 helper->targetDirLen += 1; 894 *p++ = '/'; 895 } 896 } 897 898 /* Replace the custom part of the path with the appropriate 899 * part of the entry's path. 900 */ 901 char *epath = helper->buf + helper->targetDirLen; 902 memcpy(epath, pEntry->fileName + helper->zipDirLen, 903 pEntry->fileNameLen - helper->zipDirLen); 904 epath += pEntry->fileNameLen - helper->zipDirLen; 905 *epath = '\0'; 906 907 return helper->buf; 908 } 909 910 /* 911 * Inflate all entries under zipDir to the directory specified by 912 * targetDir, which must exist and be a writable directory. 913 * 914 * The immediate children of zipDir will become the immediate 915 * children of targetDir; e.g., if the archive contains the entries 916 * 917 * a/b/c/one 918 * a/b/c/two 919 * a/b/c/d/three 920 * 921 * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting 922 * files will be 923 * 924 * /tmp/one 925 * /tmp/two 926 * /tmp/d/three 927 * 928 * Returns true on success, false on failure. 929 */ 930 bool mzExtractRecursive(const ZipArchive *pArchive, 931 const char *zipDir, const char *targetDir, 932 int flags, const struct utimbuf *timestamp, 933 void (*callback)(const char *fn, void *), void *cookie) 934 { 935 if (zipDir[0] == '/') { 936 LOGE("mzExtractRecursive(): zipDir must be a relative path.\n"); 937 return false; 938 } 939 if (targetDir[0] != '/') { 940 LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n"); 941 return false; 942 } 943 944 unsigned int zipDirLen; 945 char *zpath; 946 947 zipDirLen = strlen(zipDir); 948 zpath = (char *)malloc(zipDirLen + 2); 949 if (zpath == NULL) { 950 LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2); 951 return false; 952 } 953 /* If zipDir is empty, we'll extract the entire zip file. 954 * Otherwise, canonicalize the path. 955 */ 956 if (zipDirLen > 0) { 957 /* Make sure there's (hopefully, exactly one) slash at the 958 * end of the path. This way we don't need to worry about 959 * accidentally extracting "one/twothree" when a path like 960 * "one/two" is specified. 961 */ 962 memcpy(zpath, zipDir, zipDirLen); 963 if (zpath[zipDirLen-1] != '/') { 964 zpath[zipDirLen++] = '/'; 965 } 966 } 967 zpath[zipDirLen] = '\0'; 968 969 /* Set up the helper structure that we'll use to assemble paths. 970 */ 971 MzPathHelper helper; 972 helper.targetDir = targetDir; 973 helper.targetDirLen = strlen(helper.targetDir); 974 helper.zipDir = zpath; 975 helper.zipDirLen = strlen(helper.zipDir); 976 helper.buf = NULL; 977 helper.bufLen = 0; 978 979 /* Walk through the entries and extract anything whose path begins 980 * with zpath. 981 //TODO: since the entries are sorted, binary search for the first match 982 // and stop after the first non-match. 983 */ 984 unsigned int i; 985 bool seenMatch = false; 986 int ok = true; 987 for (i = 0; i < pArchive->numEntries; i++) { 988 ZipEntry *pEntry = pArchive->pEntries + i; 989 if (pEntry->fileNameLen < zipDirLen) { 990 //TODO: look out for a single empty directory entry that matches zpath, but 991 // missing the trailing slash. Most zip files seem to include 992 // the trailing slash, but I think it's legal to leave it off. 993 // e.g., zpath "a/b/", entry "a/b", with no children of the entry. 994 /* No chance of matching. 995 */ 996 #if SORT_ENTRIES 997 if (seenMatch) { 998 /* Since the entries are sorted, we can give up 999 * on the first mismatch after the first match. 1000 */ 1001 break; 1002 } 1003 #endif 1004 continue; 1005 } 1006 /* If zpath is empty, this strncmp() will match everything, 1007 * which is what we want. 1008 */ 1009 if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) { 1010 #if SORT_ENTRIES 1011 if (seenMatch) { 1012 /* Since the entries are sorted, we can give up 1013 * on the first mismatch after the first match. 1014 */ 1015 break; 1016 } 1017 #endif 1018 continue; 1019 } 1020 /* This entry begins with zipDir, so we'll extract it. 1021 */ 1022 seenMatch = true; 1023 1024 /* Find the target location of the entry. 1025 */ 1026 const char *targetFile = targetEntryPath(&helper, pEntry); 1027 if (targetFile == NULL) { 1028 LOGE("Can't assemble target path for \"%.*s\"\n", 1029 pEntry->fileNameLen, pEntry->fileName); 1030 ok = false; 1031 break; 1032 } 1033 1034 /* With DRY_RUN set, invoke the callback but don't do anything else. 1035 */ 1036 if (flags & MZ_EXTRACT_DRY_RUN) { 1037 if (callback != NULL) callback(targetFile, cookie); 1038 continue; 1039 } 1040 1041 /* Create the file or directory. 1042 */ 1043 #define UNZIP_DIRMODE 0755 1044 #define UNZIP_FILEMODE 0644 1045 if (pEntry->fileName[pEntry->fileNameLen-1] == '/') { 1046 if (!(flags & MZ_EXTRACT_FILES_ONLY)) { 1047 int ret = dirCreateHierarchy( 1048 targetFile, UNZIP_DIRMODE, timestamp, false); 1049 if (ret != 0) { 1050 LOGE("Can't create containing directory for \"%s\": %s\n", 1051 targetFile, strerror(errno)); 1052 ok = false; 1053 break; 1054 } 1055 LOGD("Extracted dir \"%s\"\n", targetFile); 1056 } 1057 } else { 1058 /* This is not a directory. First, make sure that 1059 * the containing directory exists. 1060 */ 1061 int ret = dirCreateHierarchy( 1062 targetFile, UNZIP_DIRMODE, timestamp, true); 1063 if (ret != 0) { 1064 LOGE("Can't create containing directory for \"%s\": %s\n", 1065 targetFile, strerror(errno)); 1066 ok = false; 1067 break; 1068 } 1069 1070 /* With FILES_ONLY set, we need to ignore metadata entirely, 1071 * so treat symlinks as regular files. 1072 */ 1073 if (!(flags & MZ_EXTRACT_FILES_ONLY) && mzIsZipEntrySymlink(pEntry)) { 1074 /* The entry is a symbolic link. 1075 * The relative target of the symlink is in the 1076 * data section of this entry. 1077 */ 1078 if (pEntry->uncompLen == 0) { 1079 LOGE("Symlink entry \"%s\" has no target\n", 1080 targetFile); 1081 ok = false; 1082 break; 1083 } 1084 char *linkTarget = malloc(pEntry->uncompLen + 1); 1085 if (linkTarget == NULL) { 1086 ok = false; 1087 break; 1088 } 1089 ok = mzReadZipEntry(pArchive, pEntry, linkTarget, 1090 pEntry->uncompLen); 1091 if (!ok) { 1092 LOGE("Can't read symlink target for \"%s\"\n", 1093 targetFile); 1094 free(linkTarget); 1095 break; 1096 } 1097 linkTarget[pEntry->uncompLen] = '\0'; 1098 1099 /* Make the link. 1100 */ 1101 ret = symlink(linkTarget, targetFile); 1102 if (ret != 0) { 1103 LOGE("Can't symlink \"%s\" to \"%s\": %s\n", 1104 targetFile, linkTarget, strerror(errno)); 1105 free(linkTarget); 1106 ok = false; 1107 break; 1108 } 1109 LOGD("Extracted symlink \"%s\" -> \"%s\"\n", 1110 targetFile, linkTarget); 1111 free(linkTarget); 1112 } else { 1113 /* The entry is a regular file. 1114 * Open the target for writing. 1115 */ 1116 int fd = creat(targetFile, UNZIP_FILEMODE); 1117 if (fd < 0) { 1118 LOGE("Can't create target file \"%s\": %s\n", 1119 targetFile, strerror(errno)); 1120 ok = false; 1121 break; 1122 } 1123 1124 bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd); 1125 close(fd); 1126 if (!ok) { 1127 LOGE("Error extracting \"%s\"\n", targetFile); 1128 ok = false; 1129 break; 1130 } 1131 1132 if (timestamp != NULL && utime(targetFile, timestamp)) { 1133 LOGE("Error touching \"%s\"\n", targetFile); 1134 ok = false; 1135 break; 1136 } 1137 1138 LOGD("Extracted file \"%s\"\n", targetFile); 1139 } 1140 } 1141 1142 if (callback != NULL) callback(targetFile, cookie); 1143 } 1144 1145 free(helper.buf); 1146 free(zpath); 1147 1148 return ok; 1149 } 1150