1 /* 2 * Copyright 2006 The Android Open Source Project 3 * 4 * Simple Zip file support. 5 */ 6 #include "safe_iop.h" 7 #include "zlib.h" 8 9 #include <errno.h> 10 #include <fcntl.h> 11 #include <limits.h> 12 #include <stdint.h> // for uintptr_t 13 #include <stdlib.h> 14 #include <sys/stat.h> // for S_ISLNK() 15 #include <unistd.h> 16 17 #define LOG_TAG "minzip" 18 #include "Zip.h" 19 #include "Bits.h" 20 #include "Log.h" 21 #include "DirUtil.h" 22 23 #undef NDEBUG // do this after including Log.h 24 #include <assert.h> 25 26 #define SORT_ENTRIES 1 27 28 /* 29 * Offset and length constants (java.util.zip naming convention). 30 */ 31 enum { 32 CENSIG = 0x02014b50, // PK12 33 CENHDR = 46, 34 35 CENVEM = 4, 36 CENVER = 6, 37 CENFLG = 8, 38 CENHOW = 10, 39 CENTIM = 12, 40 CENCRC = 16, 41 CENSIZ = 20, 42 CENLEN = 24, 43 CENNAM = 28, 44 CENEXT = 30, 45 CENCOM = 32, 46 CENDSK = 34, 47 CENATT = 36, 48 CENATX = 38, 49 CENOFF = 42, 50 51 ENDSIG = 0x06054b50, // PK56 52 ENDHDR = 22, 53 54 ENDSUB = 8, 55 ENDTOT = 10, 56 ENDSIZ = 12, 57 ENDOFF = 16, 58 ENDCOM = 20, 59 60 EXTSIG = 0x08074b50, // PK78 61 EXTHDR = 16, 62 63 EXTCRC = 4, 64 EXTSIZ = 8, 65 EXTLEN = 12, 66 67 LOCSIG = 0x04034b50, // PK34 68 LOCHDR = 30, 69 70 LOCVER = 4, 71 LOCFLG = 6, 72 LOCHOW = 8, 73 LOCTIM = 10, 74 LOCCRC = 14, 75 LOCSIZ = 18, 76 LOCLEN = 22, 77 LOCNAM = 26, 78 LOCEXT = 28, 79 80 STORED = 0, 81 DEFLATED = 8, 82 83 CENVEM_UNIX = 3 << 8, // the high byte of CENVEM 84 }; 85 86 87 /* 88 * For debugging, dump the contents of a ZipEntry. 89 */ 90 #if 0 91 static void dumpEntry(const ZipEntry* pEntry) 92 { 93 LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName); 94 LOGI(" off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset, 95 pEntry->compLen, pEntry->uncompLen, pEntry->compression); 96 } 97 #endif 98 99 /* 100 * (This is a mzHashTableLookup callback.) 101 * 102 * Compare two ZipEntry structs, by name. 103 */ 104 static int hashcmpZipEntry(const void* ventry1, const void* ventry2) 105 { 106 const ZipEntry* entry1 = (const ZipEntry*) ventry1; 107 const ZipEntry* entry2 = (const ZipEntry*) ventry2; 108 109 if (entry1->fileNameLen != entry2->fileNameLen) 110 return entry1->fileNameLen - entry2->fileNameLen; 111 return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen); 112 } 113 114 /* 115 * (This is a mzHashTableLookup callback.) 116 * 117 * find a ZipEntry struct by name. 118 */ 119 static int hashcmpZipName(const void* ventry, const void* vname) 120 { 121 const ZipEntry* entry = (const ZipEntry*) ventry; 122 const char* name = (const char*) vname; 123 unsigned int nameLen = strlen(name); 124 125 if (entry->fileNameLen != nameLen) 126 return entry->fileNameLen - nameLen; 127 return memcmp(entry->fileName, name, nameLen); 128 } 129 130 /* 131 * Compute the hash code for a ZipEntry filename. 132 * 133 * Not expected to be compatible with any other hash function, so we init 134 * to 2 to ensure it doesn't happen to match. 135 */ 136 static unsigned int computeHash(const char* name, int nameLen) 137 { 138 unsigned int hash = 2; 139 140 while (nameLen--) 141 hash = hash * 31 + *name++; 142 143 return hash; 144 } 145 146 static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry) 147 { 148 unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen); 149 const ZipEntry* found; 150 151 found = (const ZipEntry*)mzHashTableLookup(pHash, 152 itemHash, pEntry, hashcmpZipEntry, true); 153 if (found != pEntry) { 154 LOGW("WARNING: duplicate entry '%.*s' in Zip\n", 155 found->fileNameLen, found->fileName); 156 /* keep going */ 157 } 158 } 159 160 static int validFilename(const char *fileName, unsigned int fileNameLen) 161 { 162 // Forbid super long filenames. 163 if (fileNameLen >= PATH_MAX) { 164 LOGW("Filename too long (%d chatacters)\n", fileNameLen); 165 return 0; 166 } 167 168 // Require all characters to be printable ASCII (no NUL, no UTF-8, etc). 169 unsigned int i; 170 for (i = 0; i < fileNameLen; ++i) { 171 if (fileName[i] < 32 || fileName[i] >= 127) { 172 LOGW("Filename contains invalid character '\%03o'\n", fileName[i]); 173 return 0; 174 } 175 } 176 177 return 1; 178 } 179 180 /* 181 * Parse the contents of a Zip archive. After confirming that the file 182 * is in fact a Zip, we scan out the contents of the central directory and 183 * store it in a hash table. 184 * 185 * Returns "true" on success. 186 */ 187 static bool parseZipArchive(ZipArchive* pArchive) 188 { 189 bool result = false; 190 const unsigned char* ptr; 191 unsigned int i, numEntries, cdOffset; 192 unsigned int val; 193 194 /* 195 * The first 4 bytes of the file will either be the local header 196 * signature for the first file (LOCSIG) or, if the archive doesn't 197 * have any files in it, the end-of-central-directory signature (ENDSIG). 198 */ 199 val = get4LE(pArchive->addr); 200 if (val == ENDSIG) { 201 LOGI("Found Zip archive, but it looks empty\n"); 202 goto bail; 203 } else if (val != LOCSIG) { 204 LOGV("Not a Zip archive (found 0x%08x)\n", val); 205 goto bail; 206 } 207 208 /* 209 * Find the EOCD. We'll find it immediately unless they have a file 210 * comment. 211 */ 212 ptr = pArchive->addr + pArchive->length - ENDHDR; 213 214 while (ptr >= (const unsigned char*) pArchive->addr) { 215 if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG) 216 break; 217 ptr--; 218 } 219 if (ptr < (const unsigned char*) pArchive->addr) { 220 LOGI("Could not find end-of-central-directory in Zip\n"); 221 goto bail; 222 } 223 224 /* 225 * There are two interesting items in the EOCD block: the number of 226 * entries in the file, and the file offset of the start of the 227 * central directory. 228 */ 229 numEntries = get2LE(ptr + ENDSUB); 230 cdOffset = get4LE(ptr + ENDOFF); 231 232 LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset); 233 if (numEntries == 0 || cdOffset >= pArchive->length) { 234 LOGW("Invalid entries=%d offset=%d (len=%zd)\n", 235 numEntries, cdOffset, pArchive->length); 236 goto bail; 237 } 238 239 /* 240 * Create data structures to hold entries. 241 */ 242 pArchive->numEntries = numEntries; 243 pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry)); 244 pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL); 245 if (pArchive->pEntries == NULL || pArchive->pHash == NULL) 246 goto bail; 247 248 ptr = pArchive->addr + cdOffset; 249 for (i = 0; i < numEntries; i++) { 250 ZipEntry* pEntry; 251 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; 252 const unsigned char* localHdr; 253 const char *fileName; 254 255 if (ptr + CENHDR > (const unsigned char*)pArchive->addr + pArchive->length) { 256 LOGW("Ran off the end (at %d)\n", i); 257 goto bail; 258 } 259 if (get4LE(ptr) != CENSIG) { 260 LOGW("Missed a central dir sig (at %d)\n", i); 261 goto bail; 262 } 263 264 localHdrOffset = get4LE(ptr + CENOFF); 265 fileNameLen = get2LE(ptr + CENNAM); 266 extraLen = get2LE(ptr + CENEXT); 267 commentLen = get2LE(ptr + CENCOM); 268 fileName = (const char*)ptr + CENHDR; 269 if (fileName + fileNameLen > (const char*)pArchive->addr + pArchive->length) { 270 LOGW("Filename ran off the end (at %d)\n", i); 271 goto bail; 272 } 273 if (!validFilename(fileName, fileNameLen)) { 274 LOGW("Invalid filename (at %d)\n", i); 275 goto bail; 276 } 277 278 #if SORT_ENTRIES 279 /* Figure out where this entry should go (binary search). 280 */ 281 if (i > 0) { 282 int low, high; 283 284 low = 0; 285 high = i - 1; 286 while (low <= high) { 287 int mid; 288 int diff; 289 int diffLen; 290 291 mid = low + ((high - low) / 2); // avoid overflow 292 293 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) { 294 diffLen = pArchive->pEntries[mid].fileNameLen; 295 } else { 296 diffLen = fileNameLen; 297 } 298 diff = strncmp(pArchive->pEntries[mid].fileName, fileName, 299 diffLen); 300 if (diff == 0) { 301 diff = pArchive->pEntries[mid].fileNameLen - fileNameLen; 302 } 303 if (diff < 0) { 304 low = mid + 1; 305 } else if (diff > 0) { 306 high = mid - 1; 307 } else { 308 high = mid; 309 break; 310 } 311 } 312 313 unsigned int target = high + 1; 314 assert(target <= i); 315 if (target != i) { 316 /* It belongs somewhere other than at the end of 317 * the list. Make some room at [target]. 318 */ 319 memmove(pArchive->pEntries + target + 1, 320 pArchive->pEntries + target, 321 (i - target) * sizeof(ZipEntry)); 322 } 323 pEntry = &pArchive->pEntries[target]; 324 } else { 325 pEntry = &pArchive->pEntries[0]; 326 } 327 #else 328 pEntry = &pArchive->pEntries[i]; 329 #endif 330 331 //LOGI("%d: localHdr=%d fnl=%d el=%d cl=%d\n", 332 // i, localHdrOffset, fileNameLen, extraLen, commentLen); 333 334 pEntry->fileNameLen = fileNameLen; 335 pEntry->fileName = fileName; 336 337 pEntry->compLen = get4LE(ptr + CENSIZ); 338 pEntry->uncompLen = get4LE(ptr + CENLEN); 339 pEntry->compression = get2LE(ptr + CENHOW); 340 pEntry->modTime = get4LE(ptr + CENTIM); 341 pEntry->crc32 = get4LE(ptr + CENCRC); 342 343 /* These two are necessary for finding the mode of the file. 344 */ 345 pEntry->versionMadeBy = get2LE(ptr + CENVEM); 346 if ((pEntry->versionMadeBy & 0xff00) != 0 && 347 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX) 348 { 349 LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n", 350 pEntry->versionMadeBy >> 8, i); 351 goto bail; 352 } 353 pEntry->externalFileAttributes = get4LE(ptr + CENATX); 354 355 // Perform pArchive->addr + localHdrOffset, ensuring that it won't 356 // overflow. This is needed because localHdrOffset is untrusted. 357 if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pArchive->addr, 358 (uintptr_t)localHdrOffset)) { 359 LOGW("Integer overflow adding in parseZipArchive\n"); 360 goto bail; 361 } 362 if ((uintptr_t)localHdr + LOCHDR > 363 (uintptr_t)pArchive->addr + pArchive->length) { 364 LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i); 365 goto bail; 366 } 367 if (get4LE(localHdr) != LOCSIG) { 368 LOGW("Missed a local header sig (at %d)\n", i); 369 goto bail; 370 } 371 pEntry->offset = localHdrOffset + LOCHDR 372 + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT); 373 if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) { 374 LOGW("Integer overflow adding in parseZipArchive\n"); 375 goto bail; 376 } 377 if ((size_t)pEntry->offset + pEntry->compLen > pArchive->length) { 378 LOGW("Data ran off the end (at %d)\n", i); 379 goto bail; 380 } 381 382 #if !SORT_ENTRIES 383 /* Add to hash table; no need to lock here. 384 * Can't do this now if we're sorting, because entries 385 * will move around. 386 */ 387 addEntryToHashTable(pArchive->pHash, pEntry); 388 #endif 389 390 //dumpEntry(pEntry); 391 ptr += CENHDR + fileNameLen + extraLen + commentLen; 392 } 393 394 #if SORT_ENTRIES 395 /* If we're sorting, we have to wait until all entries 396 * are in their final places, otherwise the pointers will 397 * probably point to the wrong things. 398 */ 399 for (i = 0; i < numEntries; i++) { 400 /* Add to hash table; no need to lock here. 401 */ 402 addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]); 403 } 404 #endif 405 406 result = true; 407 408 bail: 409 if (!result) { 410 mzHashTableFree(pArchive->pHash); 411 pArchive->pHash = NULL; 412 } 413 return result; 414 } 415 416 /* 417 * Open a Zip archive and scan out the contents. 418 * 419 * The easiest way to do this is to mmap() the whole thing and do the 420 * traditional backward scan for central directory. Since the EOCD is 421 * a relatively small bit at the end, we should end up only touching a 422 * small set of pages. 423 * 424 * This will be called on non-Zip files, especially during startup, so 425 * we don't want to be too noisy about failures. (Do we want a "quiet" 426 * flag?) 427 * 428 * On success, we fill out the contents of "pArchive". 429 */ 430 int mzOpenZipArchive(unsigned char* addr, size_t length, ZipArchive* pArchive) 431 { 432 int err; 433 434 if (length < ENDHDR) { 435 err = -1; 436 LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length); 437 goto bail; 438 } 439 440 pArchive->addr = addr; 441 pArchive->length = length; 442 443 if (!parseZipArchive(pArchive)) { 444 err = -1; 445 LOGV("Parsing '%s' failed\n", fileName); 446 goto bail; 447 } 448 449 err = 0; 450 451 bail: 452 if (err != 0) 453 mzCloseZipArchive(pArchive); 454 return err; 455 } 456 457 /* 458 * Close a ZipArchive, closing the file and freeing the contents. 459 * 460 * NOTE: the ZipArchive may not have been fully created. 461 */ 462 void mzCloseZipArchive(ZipArchive* pArchive) 463 { 464 LOGV("Closing archive %p\n", pArchive); 465 466 free(pArchive->pEntries); 467 468 mzHashTableFree(pArchive->pHash); 469 470 pArchive->pHash = NULL; 471 pArchive->pEntries = NULL; 472 } 473 474 /* 475 * Find a matching entry. 476 * 477 * Returns NULL if no matching entry found. 478 */ 479 const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive, 480 const char* entryName) 481 { 482 unsigned int itemHash = computeHash(entryName, strlen(entryName)); 483 484 return (const ZipEntry*)mzHashTableLookup(pArchive->pHash, 485 itemHash, (char*) entryName, hashcmpZipName, false); 486 } 487 488 /* 489 * Return true if the entry is a symbolic link. 490 */ 491 bool mzIsZipEntrySymlink(const ZipEntry* pEntry) 492 { 493 if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) { 494 return S_ISLNK(pEntry->externalFileAttributes >> 16); 495 } 496 return false; 497 } 498 499 /* Call processFunction on the uncompressed data of a STORED entry. 500 */ 501 static bool processStoredEntry(const ZipArchive *pArchive, 502 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 503 void *cookie) 504 { 505 return processFunction(pArchive->addr + pEntry->offset, pEntry->uncompLen, cookie); 506 } 507 508 static bool processDeflatedEntry(const ZipArchive *pArchive, 509 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 510 void *cookie) 511 { 512 long result = -1; 513 unsigned char readBuf[32 * 1024]; 514 unsigned char procBuf[32 * 1024]; 515 z_stream zstream; 516 int zerr; 517 long compRemaining; 518 519 compRemaining = pEntry->compLen; 520 521 /* 522 * Initialize the zlib stream. 523 */ 524 memset(&zstream, 0, sizeof(zstream)); 525 zstream.zalloc = Z_NULL; 526 zstream.zfree = Z_NULL; 527 zstream.opaque = Z_NULL; 528 zstream.next_in = pArchive->addr + pEntry->offset; 529 zstream.avail_in = pEntry->compLen; 530 zstream.next_out = (Bytef*) procBuf; 531 zstream.avail_out = sizeof(procBuf); 532 zstream.data_type = Z_UNKNOWN; 533 534 /* 535 * Use the undocumented "negative window bits" feature to tell zlib 536 * that there's no zlib header waiting for it. 537 */ 538 zerr = inflateInit2(&zstream, -MAX_WBITS); 539 if (zerr != Z_OK) { 540 if (zerr == Z_VERSION_ERROR) { 541 LOGE("Installed zlib is not compatible with linked version (%s)\n", 542 ZLIB_VERSION); 543 } else { 544 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 545 } 546 goto bail; 547 } 548 549 /* 550 * Loop while we have data. 551 */ 552 do { 553 /* uncompress the data */ 554 zerr = inflate(&zstream, Z_NO_FLUSH); 555 if (zerr != Z_OK && zerr != Z_STREAM_END) { 556 LOGD("zlib inflate call failed (zerr=%d)\n", zerr); 557 goto z_bail; 558 } 559 560 /* write when we're full or when we're done */ 561 if (zstream.avail_out == 0 || 562 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf))) 563 { 564 long procSize = zstream.next_out - procBuf; 565 LOGVV("+++ processing %d bytes\n", (int) procSize); 566 bool ret = processFunction(procBuf, procSize, cookie); 567 if (!ret) { 568 LOGW("Process function elected to fail (in inflate)\n"); 569 goto z_bail; 570 } 571 572 zstream.next_out = procBuf; 573 zstream.avail_out = sizeof(procBuf); 574 } 575 } while (zerr == Z_OK); 576 577 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 578 579 // success! 580 result = zstream.total_out; 581 582 z_bail: 583 inflateEnd(&zstream); /* free up any allocated structures */ 584 585 bail: 586 if (result != pEntry->uncompLen) { 587 if (result != -1) // error already shown? 588 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 589 result, pEntry->uncompLen); 590 return false; 591 } 592 return true; 593 } 594 595 /* 596 * Stream the uncompressed data through the supplied function, 597 * passing cookie to it each time it gets called. processFunction 598 * may be called more than once. 599 * 600 * If processFunction returns false, the operation is abandoned and 601 * mzProcessZipEntryContents() immediately returns false. 602 * 603 * This is useful for calculating the hash of an entry's uncompressed contents. 604 */ 605 bool mzProcessZipEntryContents(const ZipArchive *pArchive, 606 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 607 void *cookie) 608 { 609 bool ret = false; 610 off_t oldOff; 611 612 switch (pEntry->compression) { 613 case STORED: 614 ret = processStoredEntry(pArchive, pEntry, processFunction, cookie); 615 break; 616 case DEFLATED: 617 ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie); 618 break; 619 default: 620 LOGE("Unsupported compression type %d for entry '%s'\n", 621 pEntry->compression, pEntry->fileName); 622 break; 623 } 624 625 return ret; 626 } 627 628 static bool crcProcessFunction(const unsigned char *data, int dataLen, 629 void *crc) 630 { 631 *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen); 632 return true; 633 } 634 635 /* 636 * Check the CRC on this entry; return true if it is correct. 637 * May do other internal checks as well. 638 */ 639 bool mzIsZipEntryIntact(const ZipArchive *pArchive, const ZipEntry *pEntry) 640 { 641 unsigned long crc; 642 bool ret; 643 644 crc = crc32(0L, Z_NULL, 0); 645 ret = mzProcessZipEntryContents(pArchive, pEntry, crcProcessFunction, 646 (void *)&crc); 647 if (!ret) { 648 LOGE("Can't calculate CRC for entry\n"); 649 return false; 650 } 651 if (crc != (unsigned long)pEntry->crc32) { 652 LOGW("CRC for entry %.*s (0x%08lx) != expected (0x%08lx)\n", 653 pEntry->fileNameLen, pEntry->fileName, crc, pEntry->crc32); 654 return false; 655 } 656 return true; 657 } 658 659 typedef struct { 660 char *buf; 661 int bufLen; 662 } CopyProcessArgs; 663 664 static bool copyProcessFunction(const unsigned char *data, int dataLen, 665 void *cookie) 666 { 667 CopyProcessArgs *args = (CopyProcessArgs *)cookie; 668 if (dataLen <= args->bufLen) { 669 memcpy(args->buf, data, dataLen); 670 args->buf += dataLen; 671 args->bufLen -= dataLen; 672 return true; 673 } 674 return false; 675 } 676 677 /* 678 * Read an entry into a buffer allocated by the caller. 679 */ 680 bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry, 681 char *buf, int bufLen) 682 { 683 CopyProcessArgs args; 684 bool ret; 685 686 args.buf = buf; 687 args.bufLen = bufLen; 688 ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction, 689 (void *)&args); 690 if (!ret) { 691 LOGE("Can't extract entry to buffer.\n"); 692 return false; 693 } 694 return true; 695 } 696 697 static bool writeProcessFunction(const unsigned char *data, int dataLen, 698 void *cookie) 699 { 700 int fd = (int)(intptr_t)cookie; 701 if (dataLen == 0) { 702 return true; 703 } 704 ssize_t soFar = 0; 705 while (true) { 706 ssize_t n = write(fd, data+soFar, dataLen-soFar); 707 if (n <= 0) { 708 LOGE("Error writing %zd bytes from zip file from %p: %s\n", 709 dataLen-soFar, data+soFar, strerror(errno)); 710 if (errno != EINTR) { 711 return false; 712 } 713 } else if (n > 0) { 714 soFar += n; 715 if (soFar == dataLen) return true; 716 if (soFar > dataLen) { 717 LOGE("write overrun? (%zd bytes instead of %d)\n", 718 soFar, dataLen); 719 return false; 720 } 721 } 722 } 723 } 724 725 /* 726 * Uncompress "pEntry" in "pArchive" to "fd" at the current offset. 727 */ 728 bool mzExtractZipEntryToFile(const ZipArchive *pArchive, 729 const ZipEntry *pEntry, int fd) 730 { 731 bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction, 732 (void*)(intptr_t)fd); 733 if (!ret) { 734 LOGE("Can't extract entry to file.\n"); 735 return false; 736 } 737 return true; 738 } 739 740 /* 741 * Obtain a pointer to the in-memory representation of a stored entry. 742 */ 743 bool mzGetStoredEntry(const ZipArchive *pArchive, 744 const ZipEntry *pEntry, unsigned char **addr, size_t *length) 745 { 746 if (pEntry->compression != STORED) { 747 LOGE("Can't getStoredEntry for '%s'; not stored\n", 748 pEntry->fileName); 749 return false; 750 } 751 752 *addr = pArchive->addr + pEntry->offset; 753 *length = pEntry->uncompLen; 754 return true; 755 } 756 757 typedef struct { 758 unsigned char* buffer; 759 long len; 760 } BufferExtractCookie; 761 762 static bool bufferProcessFunction(const unsigned char *data, int dataLen, 763 void *cookie) { 764 BufferExtractCookie *bec = (BufferExtractCookie*)cookie; 765 766 memmove(bec->buffer, data, dataLen); 767 bec->buffer += dataLen; 768 bec->len -= dataLen; 769 770 return true; 771 } 772 773 /* 774 * Uncompress "pEntry" in "pArchive" to buffer, which must be large 775 * enough to hold mzGetZipEntryUncomplen(pEntry) bytes. 776 */ 777 bool mzExtractZipEntryToBuffer(const ZipArchive *pArchive, 778 const ZipEntry *pEntry, unsigned char *buffer) 779 { 780 BufferExtractCookie bec; 781 bec.buffer = buffer; 782 bec.len = mzGetZipEntryUncompLen(pEntry); 783 784 bool ret = mzProcessZipEntryContents(pArchive, pEntry, 785 bufferProcessFunction, (void*)&bec); 786 if (!ret || bec.len != 0) { 787 LOGE("Can't extract entry to memory buffer.\n"); 788 return false; 789 } 790 return true; 791 } 792 793 794 /* Helper state to make path translation easier and less malloc-happy. 795 */ 796 typedef struct { 797 const char *targetDir; 798 const char *zipDir; 799 char *buf; 800 int targetDirLen; 801 int zipDirLen; 802 int bufLen; 803 } MzPathHelper; 804 805 /* Given the values of targetDir and zipDir in the helper, 806 * return the target filename of the provided entry. 807 * The helper must be initialized first. 808 */ 809 static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry) 810 { 811 int needLen; 812 bool firstTime = (helper->buf == NULL); 813 814 /* target file <-- targetDir + / + entry[zipDirLen:] 815 */ 816 needLen = helper->targetDirLen + 1 + 817 pEntry->fileNameLen - helper->zipDirLen + 1; 818 if (needLen > helper->bufLen) { 819 char *newBuf; 820 821 needLen *= 2; 822 newBuf = (char *)realloc(helper->buf, needLen); 823 if (newBuf == NULL) { 824 return NULL; 825 } 826 helper->buf = newBuf; 827 helper->bufLen = needLen; 828 } 829 830 /* Every path will start with the target path and a slash. 831 */ 832 if (firstTime) { 833 char *p = helper->buf; 834 memcpy(p, helper->targetDir, helper->targetDirLen); 835 p += helper->targetDirLen; 836 if (p == helper->buf || p[-1] != '/') { 837 helper->targetDirLen += 1; 838 *p++ = '/'; 839 } 840 } 841 842 /* Replace the custom part of the path with the appropriate 843 * part of the entry's path. 844 */ 845 char *epath = helper->buf + helper->targetDirLen; 846 memcpy(epath, pEntry->fileName + helper->zipDirLen, 847 pEntry->fileNameLen - helper->zipDirLen); 848 epath += pEntry->fileNameLen - helper->zipDirLen; 849 *epath = '\0'; 850 851 return helper->buf; 852 } 853 854 /* 855 * Inflate all entries under zipDir to the directory specified by 856 * targetDir, which must exist and be a writable directory. 857 * 858 * The immediate children of zipDir will become the immediate 859 * children of targetDir; e.g., if the archive contains the entries 860 * 861 * a/b/c/one 862 * a/b/c/two 863 * a/b/c/d/three 864 * 865 * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting 866 * files will be 867 * 868 * /tmp/one 869 * /tmp/two 870 * /tmp/d/three 871 * 872 * Returns true on success, false on failure. 873 */ 874 bool mzExtractRecursive(const ZipArchive *pArchive, 875 const char *zipDir, const char *targetDir, 876 int flags, const struct utimbuf *timestamp, 877 void (*callback)(const char *fn, void *), void *cookie, 878 struct selabel_handle *sehnd) 879 { 880 if (zipDir[0] == '/') { 881 LOGE("mzExtractRecursive(): zipDir must be a relative path.\n"); 882 return false; 883 } 884 if (targetDir[0] != '/') { 885 LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n"); 886 return false; 887 } 888 889 unsigned int zipDirLen; 890 char *zpath; 891 892 zipDirLen = strlen(zipDir); 893 zpath = (char *)malloc(zipDirLen + 2); 894 if (zpath == NULL) { 895 LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2); 896 return false; 897 } 898 /* If zipDir is empty, we'll extract the entire zip file. 899 * Otherwise, canonicalize the path. 900 */ 901 if (zipDirLen > 0) { 902 /* Make sure there's (hopefully, exactly one) slash at the 903 * end of the path. This way we don't need to worry about 904 * accidentally extracting "one/twothree" when a path like 905 * "one/two" is specified. 906 */ 907 memcpy(zpath, zipDir, zipDirLen); 908 if (zpath[zipDirLen-1] != '/') { 909 zpath[zipDirLen++] = '/'; 910 } 911 } 912 zpath[zipDirLen] = '\0'; 913 914 /* Set up the helper structure that we'll use to assemble paths. 915 */ 916 MzPathHelper helper; 917 helper.targetDir = targetDir; 918 helper.targetDirLen = strlen(helper.targetDir); 919 helper.zipDir = zpath; 920 helper.zipDirLen = strlen(helper.zipDir); 921 helper.buf = NULL; 922 helper.bufLen = 0; 923 924 /* Walk through the entries and extract anything whose path begins 925 * with zpath. 926 //TODO: since the entries are sorted, binary search for the first match 927 // and stop after the first non-match. 928 */ 929 unsigned int i; 930 bool seenMatch = false; 931 int ok = true; 932 int extractCount = 0; 933 for (i = 0; i < pArchive->numEntries; i++) { 934 ZipEntry *pEntry = pArchive->pEntries + i; 935 if (pEntry->fileNameLen < zipDirLen) { 936 //TODO: look out for a single empty directory entry that matches zpath, but 937 // missing the trailing slash. Most zip files seem to include 938 // the trailing slash, but I think it's legal to leave it off. 939 // e.g., zpath "a/b/", entry "a/b", with no children of the entry. 940 /* No chance of matching. 941 */ 942 #if SORT_ENTRIES 943 if (seenMatch) { 944 /* Since the entries are sorted, we can give up 945 * on the first mismatch after the first match. 946 */ 947 break; 948 } 949 #endif 950 continue; 951 } 952 /* If zpath is empty, this strncmp() will match everything, 953 * which is what we want. 954 */ 955 if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) { 956 #if SORT_ENTRIES 957 if (seenMatch) { 958 /* Since the entries are sorted, we can give up 959 * on the first mismatch after the first match. 960 */ 961 break; 962 } 963 #endif 964 continue; 965 } 966 /* This entry begins with zipDir, so we'll extract it. 967 */ 968 seenMatch = true; 969 970 /* Find the target location of the entry. 971 */ 972 const char *targetFile = targetEntryPath(&helper, pEntry); 973 if (targetFile == NULL) { 974 LOGE("Can't assemble target path for \"%.*s\"\n", 975 pEntry->fileNameLen, pEntry->fileName); 976 ok = false; 977 break; 978 } 979 980 /* With DRY_RUN set, invoke the callback but don't do anything else. 981 */ 982 if (flags & MZ_EXTRACT_DRY_RUN) { 983 if (callback != NULL) callback(targetFile, cookie); 984 continue; 985 } 986 987 /* Create the file or directory. 988 */ 989 #define UNZIP_DIRMODE 0755 990 #define UNZIP_FILEMODE 0644 991 if (pEntry->fileName[pEntry->fileNameLen-1] == '/') { 992 if (!(flags & MZ_EXTRACT_FILES_ONLY)) { 993 int ret = dirCreateHierarchy( 994 targetFile, UNZIP_DIRMODE, timestamp, false, sehnd); 995 if (ret != 0) { 996 LOGE("Can't create containing directory for \"%s\": %s\n", 997 targetFile, strerror(errno)); 998 ok = false; 999 break; 1000 } 1001 LOGD("Extracted dir \"%s\"\n", targetFile); 1002 } 1003 } else { 1004 /* This is not a directory. First, make sure that 1005 * the containing directory exists. 1006 */ 1007 int ret = dirCreateHierarchy( 1008 targetFile, UNZIP_DIRMODE, timestamp, true, sehnd); 1009 if (ret != 0) { 1010 LOGE("Can't create containing directory for \"%s\": %s\n", 1011 targetFile, strerror(errno)); 1012 ok = false; 1013 break; 1014 } 1015 1016 /* With FILES_ONLY set, we need to ignore metadata entirely, 1017 * so treat symlinks as regular files. 1018 */ 1019 if (!(flags & MZ_EXTRACT_FILES_ONLY) && mzIsZipEntrySymlink(pEntry)) { 1020 /* The entry is a symbolic link. 1021 * The relative target of the symlink is in the 1022 * data section of this entry. 1023 */ 1024 if (pEntry->uncompLen == 0) { 1025 LOGE("Symlink entry \"%s\" has no target\n", 1026 targetFile); 1027 ok = false; 1028 break; 1029 } 1030 char *linkTarget = malloc(pEntry->uncompLen + 1); 1031 if (linkTarget == NULL) { 1032 ok = false; 1033 break; 1034 } 1035 ok = mzReadZipEntry(pArchive, pEntry, linkTarget, 1036 pEntry->uncompLen); 1037 if (!ok) { 1038 LOGE("Can't read symlink target for \"%s\"\n", 1039 targetFile); 1040 free(linkTarget); 1041 break; 1042 } 1043 linkTarget[pEntry->uncompLen] = '\0'; 1044 1045 /* Make the link. 1046 */ 1047 ret = symlink(linkTarget, targetFile); 1048 if (ret != 0) { 1049 LOGE("Can't symlink \"%s\" to \"%s\": %s\n", 1050 targetFile, linkTarget, strerror(errno)); 1051 free(linkTarget); 1052 ok = false; 1053 break; 1054 } 1055 LOGD("Extracted symlink \"%s\" -> \"%s\"\n", 1056 targetFile, linkTarget); 1057 free(linkTarget); 1058 } else { 1059 /* The entry is a regular file. 1060 * Open the target for writing. 1061 */ 1062 1063 char *secontext = NULL; 1064 1065 if (sehnd) { 1066 selabel_lookup(sehnd, &secontext, targetFile, UNZIP_FILEMODE); 1067 setfscreatecon(secontext); 1068 } 1069 1070 int fd = open(targetFile, O_CREAT|O_WRONLY|O_TRUNC|O_SYNC 1071 , UNZIP_FILEMODE); 1072 1073 if (secontext) { 1074 freecon(secontext); 1075 setfscreatecon(NULL); 1076 } 1077 1078 if (fd < 0) { 1079 LOGE("Can't create target file \"%s\": %s\n", 1080 targetFile, strerror(errno)); 1081 ok = false; 1082 break; 1083 } 1084 1085 bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd); 1086 if (ok) { 1087 ok = (fsync(fd) == 0); 1088 } 1089 if (close(fd) != 0) { 1090 ok = false; 1091 } 1092 if (!ok) { 1093 LOGE("Error extracting \"%s\"\n", targetFile); 1094 ok = false; 1095 break; 1096 } 1097 1098 if (timestamp != NULL && utime(targetFile, timestamp)) { 1099 LOGE("Error touching \"%s\"\n", targetFile); 1100 ok = false; 1101 break; 1102 } 1103 1104 LOGV("Extracted file \"%s\"\n", targetFile); 1105 ++extractCount; 1106 } 1107 } 1108 1109 if (callback != NULL) callback(targetFile, cookie); 1110 } 1111 1112 LOGD("Extracted %d file(s)\n", extractCount); 1113 1114 free(helper.buf); 1115 free(zpath); 1116 1117 return ok; 1118 } 1119