1 /* 2 * Copyright 2006 The Android Open Source Project 3 * 4 * Simple Zip file support. 5 */ 6 #include "safe_iop.h" 7 #include "zlib.h" 8 9 #include <errno.h> 10 #include <fcntl.h> 11 #include <limits.h> 12 #include <stdint.h> // for uintptr_t 13 #include <stdlib.h> 14 #include <sys/stat.h> // for S_ISLNK() 15 #include <unistd.h> 16 17 #define LOG_TAG "minzip" 18 #include "Zip.h" 19 #include "Bits.h" 20 #include "Log.h" 21 #include "DirUtil.h" 22 23 #undef NDEBUG // do this after including Log.h 24 #include <assert.h> 25 26 #define SORT_ENTRIES 1 27 28 /* 29 * Offset and length constants (java.util.zip naming convention). 30 */ 31 enum { 32 CENSIG = 0x02014b50, // PK12 33 CENHDR = 46, 34 35 CENVEM = 4, 36 CENVER = 6, 37 CENFLG = 8, 38 CENHOW = 10, 39 CENTIM = 12, 40 CENCRC = 16, 41 CENSIZ = 20, 42 CENLEN = 24, 43 CENNAM = 28, 44 CENEXT = 30, 45 CENCOM = 32, 46 CENDSK = 34, 47 CENATT = 36, 48 CENATX = 38, 49 CENOFF = 42, 50 51 ENDSIG = 0x06054b50, // PK56 52 ENDHDR = 22, 53 54 ENDSUB = 8, 55 ENDTOT = 10, 56 ENDSIZ = 12, 57 ENDOFF = 16, 58 ENDCOM = 20, 59 60 EXTSIG = 0x08074b50, // PK78 61 EXTHDR = 16, 62 63 EXTCRC = 4, 64 EXTSIZ = 8, 65 EXTLEN = 12, 66 67 LOCSIG = 0x04034b50, // PK34 68 LOCHDR = 30, 69 70 LOCVER = 4, 71 LOCFLG = 6, 72 LOCHOW = 8, 73 LOCTIM = 10, 74 LOCCRC = 14, 75 LOCSIZ = 18, 76 LOCLEN = 22, 77 LOCNAM = 26, 78 LOCEXT = 28, 79 80 STORED = 0, 81 DEFLATED = 8, 82 83 CENVEM_UNIX = 3 << 8, // the high byte of CENVEM 84 }; 85 86 87 /* 88 * For debugging, dump the contents of a ZipEntry. 89 */ 90 #if 0 91 static void dumpEntry(const ZipEntry* pEntry) 92 { 93 LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName); 94 LOGI(" off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset, 95 pEntry->compLen, pEntry->uncompLen, pEntry->compression); 96 } 97 #endif 98 99 /* 100 * (This is a mzHashTableLookup callback.) 101 * 102 * Compare two ZipEntry structs, by name. 103 */ 104 static int hashcmpZipEntry(const void* ventry1, const void* ventry2) 105 { 106 const ZipEntry* entry1 = (const ZipEntry*) ventry1; 107 const ZipEntry* entry2 = (const ZipEntry*) ventry2; 108 109 if (entry1->fileNameLen != entry2->fileNameLen) 110 return entry1->fileNameLen - entry2->fileNameLen; 111 return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen); 112 } 113 114 /* 115 * (This is a mzHashTableLookup callback.) 116 * 117 * find a ZipEntry struct by name. 118 */ 119 static int hashcmpZipName(const void* ventry, const void* vname) 120 { 121 const ZipEntry* entry = (const ZipEntry*) ventry; 122 const char* name = (const char*) vname; 123 unsigned int nameLen = strlen(name); 124 125 if (entry->fileNameLen != nameLen) 126 return entry->fileNameLen - nameLen; 127 return memcmp(entry->fileName, name, nameLen); 128 } 129 130 /* 131 * Compute the hash code for a ZipEntry filename. 132 * 133 * Not expected to be compatible with any other hash function, so we init 134 * to 2 to ensure it doesn't happen to match. 135 */ 136 static unsigned int computeHash(const char* name, int nameLen) 137 { 138 unsigned int hash = 2; 139 140 while (nameLen--) 141 hash = hash * 31 + *name++; 142 143 return hash; 144 } 145 146 static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry) 147 { 148 unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen); 149 const ZipEntry* found; 150 151 found = (const ZipEntry*)mzHashTableLookup(pHash, 152 itemHash, pEntry, hashcmpZipEntry, true); 153 if (found != pEntry) { 154 LOGW("WARNING: duplicate entry '%.*s' in Zip\n", 155 found->fileNameLen, found->fileName); 156 /* keep going */ 157 } 158 } 159 160 static int validFilename(const char *fileName, unsigned int fileNameLen) 161 { 162 // Forbid super long filenames. 163 if (fileNameLen >= PATH_MAX) { 164 LOGW("Filename too long (%d chatacters)\n", fileNameLen); 165 return 0; 166 } 167 168 // Require all characters to be printable ASCII (no NUL, no UTF-8, etc). 169 unsigned int i; 170 for (i = 0; i < fileNameLen; ++i) { 171 if (fileName[i] < 32 || fileName[i] >= 127) { 172 LOGW("Filename contains invalid character '\%03o'\n", fileName[i]); 173 return 0; 174 } 175 } 176 177 return 1; 178 } 179 180 /* 181 * Parse the contents of a Zip archive. After confirming that the file 182 * is in fact a Zip, we scan out the contents of the central directory and 183 * store it in a hash table. 184 * 185 * Returns "true" on success. 186 */ 187 static bool parseZipArchive(ZipArchive* pArchive) 188 { 189 bool result = false; 190 const unsigned char* ptr; 191 unsigned int i, numEntries, cdOffset; 192 unsigned int val; 193 194 /* 195 * The first 4 bytes of the file will either be the local header 196 * signature for the first file (LOCSIG) or, if the archive doesn't 197 * have any files in it, the end-of-central-directory signature (ENDSIG). 198 */ 199 val = get4LE(pArchive->addr); 200 if (val == ENDSIG) { 201 LOGW("Found Zip archive, but it looks empty\n"); 202 goto bail; 203 } else if (val != LOCSIG) { 204 LOGW("Not a Zip archive (found 0x%08x)\n", val); 205 goto bail; 206 } 207 208 /* 209 * Find the EOCD. We'll find it immediately unless they have a file 210 * comment. 211 */ 212 ptr = pArchive->addr + pArchive->length - ENDHDR; 213 214 while (ptr >= (const unsigned char*) pArchive->addr) { 215 if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG) 216 break; 217 ptr--; 218 } 219 if (ptr < (const unsigned char*) pArchive->addr) { 220 LOGW("Could not find end-of-central-directory in Zip\n"); 221 goto bail; 222 } 223 224 /* 225 * There are two interesting items in the EOCD block: the number of 226 * entries in the file, and the file offset of the start of the 227 * central directory. 228 */ 229 numEntries = get2LE(ptr + ENDSUB); 230 cdOffset = get4LE(ptr + ENDOFF); 231 232 LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset); 233 if (numEntries == 0 || cdOffset >= pArchive->length) { 234 LOGW("Invalid entries=%d offset=%d (len=%zd)\n", 235 numEntries, cdOffset, pArchive->length); 236 goto bail; 237 } 238 239 /* 240 * Create data structures to hold entries. 241 */ 242 pArchive->numEntries = numEntries; 243 pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry)); 244 pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL); 245 if (pArchive->pEntries == NULL || pArchive->pHash == NULL) 246 goto bail; 247 248 ptr = pArchive->addr + cdOffset; 249 for (i = 0; i < numEntries; i++) { 250 ZipEntry* pEntry; 251 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; 252 const unsigned char* localHdr; 253 const char *fileName; 254 255 if (ptr + CENHDR > (const unsigned char*)pArchive->addr + pArchive->length) { 256 LOGW("Ran off the end (at %d)\n", i); 257 goto bail; 258 } 259 if (get4LE(ptr) != CENSIG) { 260 LOGW("Missed a central dir sig (at %d)\n", i); 261 goto bail; 262 } 263 264 localHdrOffset = get4LE(ptr + CENOFF); 265 fileNameLen = get2LE(ptr + CENNAM); 266 extraLen = get2LE(ptr + CENEXT); 267 commentLen = get2LE(ptr + CENCOM); 268 fileName = (const char*)ptr + CENHDR; 269 if (fileName + fileNameLen > (const char*)pArchive->addr + pArchive->length) { 270 LOGW("Filename ran off the end (at %d)\n", i); 271 goto bail; 272 } 273 if (!validFilename(fileName, fileNameLen)) { 274 LOGW("Invalid filename (at %d)\n", i); 275 goto bail; 276 } 277 278 #if SORT_ENTRIES 279 /* Figure out where this entry should go (binary search). 280 */ 281 if (i > 0) { 282 int low, high; 283 284 low = 0; 285 high = i - 1; 286 while (low <= high) { 287 int mid; 288 int diff; 289 int diffLen; 290 291 mid = low + ((high - low) / 2); // avoid overflow 292 293 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) { 294 diffLen = pArchive->pEntries[mid].fileNameLen; 295 } else { 296 diffLen = fileNameLen; 297 } 298 diff = strncmp(pArchive->pEntries[mid].fileName, fileName, 299 diffLen); 300 if (diff == 0) { 301 diff = pArchive->pEntries[mid].fileNameLen - fileNameLen; 302 } 303 if (diff < 0) { 304 low = mid + 1; 305 } else if (diff > 0) { 306 high = mid - 1; 307 } else { 308 high = mid; 309 break; 310 } 311 } 312 313 unsigned int target = high + 1; 314 assert(target <= i); 315 if (target != i) { 316 /* It belongs somewhere other than at the end of 317 * the list. Make some room at [target]. 318 */ 319 memmove(pArchive->pEntries + target + 1, 320 pArchive->pEntries + target, 321 (i - target) * sizeof(ZipEntry)); 322 } 323 pEntry = &pArchive->pEntries[target]; 324 } else { 325 pEntry = &pArchive->pEntries[0]; 326 } 327 #else 328 pEntry = &pArchive->pEntries[i]; 329 #endif 330 pEntry->fileNameLen = fileNameLen; 331 pEntry->fileName = fileName; 332 333 pEntry->compLen = get4LE(ptr + CENSIZ); 334 pEntry->uncompLen = get4LE(ptr + CENLEN); 335 pEntry->compression = get2LE(ptr + CENHOW); 336 pEntry->modTime = get4LE(ptr + CENTIM); 337 pEntry->crc32 = get4LE(ptr + CENCRC); 338 339 /* These two are necessary for finding the mode of the file. 340 */ 341 pEntry->versionMadeBy = get2LE(ptr + CENVEM); 342 if ((pEntry->versionMadeBy & 0xff00) != 0 && 343 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX) 344 { 345 LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n", 346 pEntry->versionMadeBy >> 8, i); 347 goto bail; 348 } 349 pEntry->externalFileAttributes = get4LE(ptr + CENATX); 350 351 // Perform pArchive->addr + localHdrOffset, ensuring that it won't 352 // overflow. This is needed because localHdrOffset is untrusted. 353 if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pArchive->addr, 354 (uintptr_t)localHdrOffset)) { 355 LOGW("Integer overflow adding in parseZipArchive\n"); 356 goto bail; 357 } 358 if ((uintptr_t)localHdr + LOCHDR > 359 (uintptr_t)pArchive->addr + pArchive->length) { 360 LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i); 361 goto bail; 362 } 363 if (get4LE(localHdr) != LOCSIG) { 364 LOGW("Missed a local header sig (at %d)\n", i); 365 goto bail; 366 } 367 pEntry->offset = localHdrOffset + LOCHDR 368 + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT); 369 if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) { 370 LOGW("Integer overflow adding in parseZipArchive\n"); 371 goto bail; 372 } 373 if ((size_t)pEntry->offset + pEntry->compLen > pArchive->length) { 374 LOGW("Data ran off the end (at %d)\n", i); 375 goto bail; 376 } 377 378 #if !SORT_ENTRIES 379 /* Add to hash table; no need to lock here. 380 * Can't do this now if we're sorting, because entries 381 * will move around. 382 */ 383 addEntryToHashTable(pArchive->pHash, pEntry); 384 #endif 385 386 //dumpEntry(pEntry); 387 ptr += CENHDR + fileNameLen + extraLen + commentLen; 388 } 389 390 #if SORT_ENTRIES 391 /* If we're sorting, we have to wait until all entries 392 * are in their final places, otherwise the pointers will 393 * probably point to the wrong things. 394 */ 395 for (i = 0; i < numEntries; i++) { 396 /* Add to hash table; no need to lock here. 397 */ 398 addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]); 399 } 400 #endif 401 402 result = true; 403 404 bail: 405 if (!result) { 406 mzHashTableFree(pArchive->pHash); 407 pArchive->pHash = NULL; 408 } 409 return result; 410 } 411 412 /* 413 * Open a Zip archive and scan out the contents. 414 * 415 * The easiest way to do this is to mmap() the whole thing and do the 416 * traditional backward scan for central directory. Since the EOCD is 417 * a relatively small bit at the end, we should end up only touching a 418 * small set of pages. 419 * 420 * This will be called on non-Zip files, especially during startup, so 421 * we don't want to be too noisy about failures. (Do we want a "quiet" 422 * flag?) 423 * 424 * On success, we fill out the contents of "pArchive". 425 */ 426 int mzOpenZipArchive(unsigned char* addr, size_t length, ZipArchive* pArchive) 427 { 428 int err; 429 430 if (length < ENDHDR) { 431 err = -1; 432 LOGW("Archive %p is too small to be zip (%zd)\n", pArchive, length); 433 goto bail; 434 } 435 436 pArchive->addr = addr; 437 pArchive->length = length; 438 439 if (!parseZipArchive(pArchive)) { 440 err = -1; 441 LOGW("Parsing archive %p failed\n", pArchive); 442 goto bail; 443 } 444 445 err = 0; 446 447 bail: 448 if (err != 0) 449 mzCloseZipArchive(pArchive); 450 return err; 451 } 452 453 /* 454 * Close a ZipArchive, closing the file and freeing the contents. 455 * 456 * NOTE: the ZipArchive may not have been fully created. 457 */ 458 void mzCloseZipArchive(ZipArchive* pArchive) 459 { 460 LOGV("Closing archive %p\n", pArchive); 461 462 free(pArchive->pEntries); 463 464 mzHashTableFree(pArchive->pHash); 465 466 pArchive->pHash = NULL; 467 pArchive->pEntries = NULL; 468 } 469 470 /* 471 * Find a matching entry. 472 * 473 * Returns NULL if no matching entry found. 474 */ 475 const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive, 476 const char* entryName) 477 { 478 unsigned int itemHash = computeHash(entryName, strlen(entryName)); 479 480 return (const ZipEntry*)mzHashTableLookup(pArchive->pHash, 481 itemHash, (char*) entryName, hashcmpZipName, false); 482 } 483 484 /* 485 * Return true if the entry is a symbolic link. 486 */ 487 static bool mzIsZipEntrySymlink(const ZipEntry* pEntry) 488 { 489 if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) { 490 return S_ISLNK(pEntry->externalFileAttributes >> 16); 491 } 492 return false; 493 } 494 495 /* Call processFunction on the uncompressed data of a STORED entry. 496 */ 497 static bool processStoredEntry(const ZipArchive *pArchive, 498 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 499 void *cookie) 500 { 501 return processFunction(pArchive->addr + pEntry->offset, pEntry->uncompLen, cookie); 502 } 503 504 static bool processDeflatedEntry(const ZipArchive *pArchive, 505 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 506 void *cookie) 507 { 508 long result = -1; 509 unsigned char procBuf[32 * 1024]; 510 z_stream zstream; 511 int zerr; 512 long compRemaining; 513 514 compRemaining = pEntry->compLen; 515 516 /* 517 * Initialize the zlib stream. 518 */ 519 memset(&zstream, 0, sizeof(zstream)); 520 zstream.zalloc = Z_NULL; 521 zstream.zfree = Z_NULL; 522 zstream.opaque = Z_NULL; 523 zstream.next_in = pArchive->addr + pEntry->offset; 524 zstream.avail_in = pEntry->compLen; 525 zstream.next_out = (Bytef*) procBuf; 526 zstream.avail_out = sizeof(procBuf); 527 zstream.data_type = Z_UNKNOWN; 528 529 /* 530 * Use the undocumented "negative window bits" feature to tell zlib 531 * that there's no zlib header waiting for it. 532 */ 533 zerr = inflateInit2(&zstream, -MAX_WBITS); 534 if (zerr != Z_OK) { 535 if (zerr == Z_VERSION_ERROR) { 536 LOGE("Installed zlib is not compatible with linked version (%s)\n", 537 ZLIB_VERSION); 538 } else { 539 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 540 } 541 goto bail; 542 } 543 544 /* 545 * Loop while we have data. 546 */ 547 do { 548 /* uncompress the data */ 549 zerr = inflate(&zstream, Z_NO_FLUSH); 550 if (zerr != Z_OK && zerr != Z_STREAM_END) { 551 LOGW("zlib inflate call failed (zerr=%d)\n", zerr); 552 goto z_bail; 553 } 554 555 /* write when we're full or when we're done */ 556 if (zstream.avail_out == 0 || 557 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf))) 558 { 559 long procSize = zstream.next_out - procBuf; 560 LOGVV("+++ processing %d bytes\n", (int) procSize); 561 bool ret = processFunction(procBuf, procSize, cookie); 562 if (!ret) { 563 LOGW("Process function elected to fail (in inflate)\n"); 564 goto z_bail; 565 } 566 567 zstream.next_out = procBuf; 568 zstream.avail_out = sizeof(procBuf); 569 } 570 } while (zerr == Z_OK); 571 572 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 573 574 // success! 575 result = zstream.total_out; 576 577 z_bail: 578 inflateEnd(&zstream); /* free up any allocated structures */ 579 580 bail: 581 if (result != pEntry->uncompLen) { 582 if (result != -1) // error already shown? 583 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 584 result, pEntry->uncompLen); 585 return false; 586 } 587 return true; 588 } 589 590 /* 591 * Stream the uncompressed data through the supplied function, 592 * passing cookie to it each time it gets called. processFunction 593 * may be called more than once. 594 * 595 * If processFunction returns false, the operation is abandoned and 596 * mzProcessZipEntryContents() immediately returns false. 597 * 598 * This is useful for calculating the hash of an entry's uncompressed contents. 599 */ 600 bool mzProcessZipEntryContents(const ZipArchive *pArchive, 601 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 602 void *cookie) 603 { 604 bool ret = false; 605 606 switch (pEntry->compression) { 607 case STORED: 608 ret = processStoredEntry(pArchive, pEntry, processFunction, cookie); 609 break; 610 case DEFLATED: 611 ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie); 612 break; 613 default: 614 LOGE("Unsupported compression type %d for entry '%s'\n", 615 pEntry->compression, pEntry->fileName); 616 break; 617 } 618 619 return ret; 620 } 621 622 typedef struct { 623 char *buf; 624 int bufLen; 625 } CopyProcessArgs; 626 627 static bool copyProcessFunction(const unsigned char *data, int dataLen, 628 void *cookie) 629 { 630 CopyProcessArgs *args = (CopyProcessArgs *)cookie; 631 if (dataLen <= args->bufLen) { 632 memcpy(args->buf, data, dataLen); 633 args->buf += dataLen; 634 args->bufLen -= dataLen; 635 return true; 636 } 637 return false; 638 } 639 640 /* 641 * Read an entry into a buffer allocated by the caller. 642 */ 643 bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry, 644 char *buf, int bufLen) 645 { 646 CopyProcessArgs args; 647 bool ret; 648 649 args.buf = buf; 650 args.bufLen = bufLen; 651 ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction, 652 (void *)&args); 653 if (!ret) { 654 LOGE("Can't extract entry to buffer.\n"); 655 return false; 656 } 657 return true; 658 } 659 660 static bool writeProcessFunction(const unsigned char *data, int dataLen, 661 void *cookie) 662 { 663 int fd = (int)(intptr_t)cookie; 664 if (dataLen == 0) { 665 return true; 666 } 667 ssize_t soFar = 0; 668 while (true) { 669 ssize_t n = TEMP_FAILURE_RETRY(write(fd, data+soFar, dataLen-soFar)); 670 if (n <= 0) { 671 LOGE("Error writing %zd bytes from zip file from %p: %s\n", 672 dataLen-soFar, data+soFar, strerror(errno)); 673 return false; 674 } else if (n > 0) { 675 soFar += n; 676 if (soFar == dataLen) return true; 677 if (soFar > dataLen) { 678 LOGE("write overrun? (%zd bytes instead of %d)\n", 679 soFar, dataLen); 680 return false; 681 } 682 } 683 } 684 } 685 686 /* 687 * Uncompress "pEntry" in "pArchive" to "fd" at the current offset. 688 */ 689 bool mzExtractZipEntryToFile(const ZipArchive *pArchive, 690 const ZipEntry *pEntry, int fd) 691 { 692 bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction, 693 (void*)(intptr_t)fd); 694 if (!ret) { 695 LOGE("Can't extract entry to file.\n"); 696 return false; 697 } 698 return true; 699 } 700 701 typedef struct { 702 unsigned char* buffer; 703 long len; 704 } BufferExtractCookie; 705 706 static bool bufferProcessFunction(const unsigned char *data, int dataLen, 707 void *cookie) { 708 BufferExtractCookie *bec = (BufferExtractCookie*)cookie; 709 710 memmove(bec->buffer, data, dataLen); 711 bec->buffer += dataLen; 712 bec->len -= dataLen; 713 714 return true; 715 } 716 717 /* 718 * Uncompress "pEntry" in "pArchive" to buffer, which must be large 719 * enough to hold mzGetZipEntryUncomplen(pEntry) bytes. 720 */ 721 bool mzExtractZipEntryToBuffer(const ZipArchive *pArchive, 722 const ZipEntry *pEntry, unsigned char *buffer) 723 { 724 BufferExtractCookie bec; 725 bec.buffer = buffer; 726 bec.len = mzGetZipEntryUncompLen(pEntry); 727 728 bool ret = mzProcessZipEntryContents(pArchive, pEntry, 729 bufferProcessFunction, (void*)&bec); 730 if (!ret || bec.len != 0) { 731 LOGE("Can't extract entry to memory buffer.\n"); 732 return false; 733 } 734 return true; 735 } 736 737 738 /* Helper state to make path translation easier and less malloc-happy. 739 */ 740 typedef struct { 741 const char *targetDir; 742 const char *zipDir; 743 char *buf; 744 int targetDirLen; 745 int zipDirLen; 746 int bufLen; 747 } MzPathHelper; 748 749 /* Given the values of targetDir and zipDir in the helper, 750 * return the target filename of the provided entry. 751 * The helper must be initialized first. 752 */ 753 static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry) 754 { 755 int needLen; 756 bool firstTime = (helper->buf == NULL); 757 758 /* target file <-- targetDir + / + entry[zipDirLen:] 759 */ 760 needLen = helper->targetDirLen + 1 + 761 pEntry->fileNameLen - helper->zipDirLen + 1; 762 if (needLen > helper->bufLen) { 763 char *newBuf; 764 765 needLen *= 2; 766 newBuf = (char *)realloc(helper->buf, needLen); 767 if (newBuf == NULL) { 768 return NULL; 769 } 770 helper->buf = newBuf; 771 helper->bufLen = needLen; 772 } 773 774 /* Every path will start with the target path and a slash. 775 */ 776 if (firstTime) { 777 char *p = helper->buf; 778 memcpy(p, helper->targetDir, helper->targetDirLen); 779 p += helper->targetDirLen; 780 if (p == helper->buf || p[-1] != '/') { 781 helper->targetDirLen += 1; 782 *p++ = '/'; 783 } 784 } 785 786 /* Replace the custom part of the path with the appropriate 787 * part of the entry's path. 788 */ 789 char *epath = helper->buf + helper->targetDirLen; 790 memcpy(epath, pEntry->fileName + helper->zipDirLen, 791 pEntry->fileNameLen - helper->zipDirLen); 792 epath += pEntry->fileNameLen - helper->zipDirLen; 793 *epath = '\0'; 794 795 return helper->buf; 796 } 797 798 /* 799 * Inflate all entries under zipDir to the directory specified by 800 * targetDir, which must exist and be a writable directory. 801 * 802 * The immediate children of zipDir will become the immediate 803 * children of targetDir; e.g., if the archive contains the entries 804 * 805 * a/b/c/one 806 * a/b/c/two 807 * a/b/c/d/three 808 * 809 * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting 810 * files will be 811 * 812 * /tmp/one 813 * /tmp/two 814 * /tmp/d/three 815 * 816 * Returns true on success, false on failure. 817 */ 818 bool mzExtractRecursive(const ZipArchive *pArchive, 819 const char *zipDir, const char *targetDir, 820 const struct utimbuf *timestamp, 821 void (*callback)(const char *fn, void *), void *cookie, 822 struct selabel_handle *sehnd) 823 { 824 if (zipDir[0] == '/') { 825 LOGE("mzExtractRecursive(): zipDir must be a relative path.\n"); 826 return false; 827 } 828 if (targetDir[0] != '/') { 829 LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n"); 830 return false; 831 } 832 833 unsigned int zipDirLen; 834 char *zpath; 835 836 zipDirLen = strlen(zipDir); 837 zpath = (char *)malloc(zipDirLen + 2); 838 if (zpath == NULL) { 839 LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2); 840 return false; 841 } 842 /* If zipDir is empty, we'll extract the entire zip file. 843 * Otherwise, canonicalize the path. 844 */ 845 if (zipDirLen > 0) { 846 /* Make sure there's (hopefully, exactly one) slash at the 847 * end of the path. This way we don't need to worry about 848 * accidentally extracting "one/twothree" when a path like 849 * "one/two" is specified. 850 */ 851 memcpy(zpath, zipDir, zipDirLen); 852 if (zpath[zipDirLen-1] != '/') { 853 zpath[zipDirLen++] = '/'; 854 } 855 } 856 zpath[zipDirLen] = '\0'; 857 858 /* Set up the helper structure that we'll use to assemble paths. 859 */ 860 MzPathHelper helper; 861 helper.targetDir = targetDir; 862 helper.targetDirLen = strlen(helper.targetDir); 863 helper.zipDir = zpath; 864 helper.zipDirLen = strlen(helper.zipDir); 865 helper.buf = NULL; 866 helper.bufLen = 0; 867 868 /* Walk through the entries and extract anything whose path begins 869 * with zpath. 870 //TODO: since the entries are sorted, binary search for the first match 871 // and stop after the first non-match. 872 */ 873 unsigned int i; 874 bool seenMatch = false; 875 int ok = true; 876 int extractCount = 0; 877 for (i = 0; i < pArchive->numEntries; i++) { 878 ZipEntry *pEntry = pArchive->pEntries + i; 879 if (pEntry->fileNameLen < zipDirLen) { 880 //TODO: look out for a single empty directory entry that matches zpath, but 881 // missing the trailing slash. Most zip files seem to include 882 // the trailing slash, but I think it's legal to leave it off. 883 // e.g., zpath "a/b/", entry "a/b", with no children of the entry. 884 /* No chance of matching. 885 */ 886 #if SORT_ENTRIES 887 if (seenMatch) { 888 /* Since the entries are sorted, we can give up 889 * on the first mismatch after the first match. 890 */ 891 break; 892 } 893 #endif 894 continue; 895 } 896 /* If zpath is empty, this strncmp() will match everything, 897 * which is what we want. 898 */ 899 if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) { 900 #if SORT_ENTRIES 901 if (seenMatch) { 902 /* Since the entries are sorted, we can give up 903 * on the first mismatch after the first match. 904 */ 905 break; 906 } 907 #endif 908 continue; 909 } 910 /* This entry begins with zipDir, so we'll extract it. 911 */ 912 seenMatch = true; 913 914 /* Find the target location of the entry. 915 */ 916 const char *targetFile = targetEntryPath(&helper, pEntry); 917 if (targetFile == NULL) { 918 LOGE("Can't assemble target path for \"%.*s\"\n", 919 pEntry->fileNameLen, pEntry->fileName); 920 ok = false; 921 break; 922 } 923 924 #define UNZIP_DIRMODE 0755 925 #define UNZIP_FILEMODE 0644 926 /* 927 * Create the file or directory. We ignore directory entries 928 * because we recursively create paths to each file entry we encounter 929 * in the zip archive anyway. 930 * 931 * NOTE: A "directory entry" in a zip archive is just a zero length 932 * entry that ends in a "/". They're not mandatory and many tools get 933 * rid of them. We need to process them only if we want to preserve 934 * empty directories from the archive. 935 */ 936 if (pEntry->fileName[pEntry->fileNameLen-1] != '/') { 937 /* This is not a directory. First, make sure that 938 * the containing directory exists. 939 */ 940 int ret = dirCreateHierarchy( 941 targetFile, UNZIP_DIRMODE, timestamp, true, sehnd); 942 if (ret != 0) { 943 LOGE("Can't create containing directory for \"%s\": %s\n", 944 targetFile, strerror(errno)); 945 ok = false; 946 break; 947 } 948 949 /* 950 * The entry is a regular file or a symlink. Open the target for writing. 951 * 952 * TODO: This behavior for symlinks seems rather bizarre. For a 953 * symlink foo/bar/baz -> foo/tar/taz, we will create a file called 954 * "foo/bar/baz" whose contents are the literal "foo/tar/taz". We 955 * warn about this for now and preserve older behavior. 956 */ 957 if (mzIsZipEntrySymlink(pEntry)) { 958 LOGE("Symlink entry \"%.*s\" will be output as a regular file.", 959 pEntry->fileNameLen, pEntry->fileName); 960 } 961 962 char *secontext = NULL; 963 964 if (sehnd) { 965 selabel_lookup(sehnd, &secontext, targetFile, UNZIP_FILEMODE); 966 setfscreatecon(secontext); 967 } 968 969 int fd = open(targetFile, O_CREAT|O_WRONLY|O_TRUNC|O_SYNC, 970 UNZIP_FILEMODE); 971 972 if (secontext) { 973 freecon(secontext); 974 setfscreatecon(NULL); 975 } 976 977 if (fd < 0) { 978 LOGE("Can't create target file \"%s\": %s\n", 979 targetFile, strerror(errno)); 980 ok = false; 981 break; 982 } 983 984 bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd); 985 if (ok) { 986 ok = (fsync(fd) == 0); 987 } 988 if (close(fd) != 0) { 989 ok = false; 990 } 991 if (!ok) { 992 LOGE("Error extracting \"%s\"\n", targetFile); 993 ok = false; 994 break; 995 } 996 997 if (timestamp != NULL && utime(targetFile, timestamp)) { 998 LOGE("Error touching \"%s\"\n", targetFile); 999 ok = false; 1000 break; 1001 } 1002 1003 LOGV("Extracted file \"%s\"\n", targetFile); 1004 ++extractCount; 1005 } 1006 1007 if (callback != NULL) callback(targetFile, cookie); 1008 } 1009 1010 LOGV("Extracted %d file(s)\n", extractCount); 1011 1012 free(helper.buf); 1013 free(zpath); 1014 1015 return ok; 1016 } 1017