1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /* 18 * Read-only access to Zip archives, with minimal heap allocation. 19 */ 20 21 #define LOG_TAG "ziparchive" 22 23 #include <assert.h> 24 #include <errno.h> 25 #include <fcntl.h> 26 #include <inttypes.h> 27 #include <limits.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <time.h> 31 #include <unistd.h> 32 33 #include <memory> 34 #include <vector> 35 36 #include <android-base/file.h> 37 #include <android-base/logging.h> 38 #include <android-base/macros.h> // TEMP_FAILURE_RETRY may or may not be in unistd 39 #include <android-base/memory.h> 40 #include <log/log.h> 41 #include <utils/Compat.h> 42 #include <utils/FileMap.h> 43 #include "ziparchive/zip_archive.h" 44 #include "zlib.h" 45 46 #include "entry_name_utils-inl.h" 47 #include "zip_archive_common.h" 48 #include "zip_archive_private.h" 49 50 using android::base::get_unaligned; 51 52 // Used to turn on crc checks - verify that the content CRC matches the values 53 // specified in the local file header and the central directory. 54 static const bool kCrcChecksEnabled = false; 55 56 // This is for windows. If we don't open a file in binary mode, weird 57 // things will happen. 58 #ifndef O_BINARY 59 #define O_BINARY 0 60 #endif 61 62 // The maximum number of bytes to scan backwards for the EOCD start. 63 static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord); 64 65 /* 66 * A Read-only Zip archive. 67 * 68 * We want "open" and "find entry by name" to be fast operations, and 69 * we want to use as little memory as possible. We memory-map the zip 70 * central directory, and load a hash table with pointers to the filenames 71 * (which aren't null-terminated). The other fields are at a fixed offset 72 * from the filename, so we don't need to extract those (but we do need 73 * to byte-read and endian-swap them every time we want them). 74 * 75 * It's possible that somebody has handed us a massive (~1GB) zip archive, 76 * so we can't expect to mmap the entire file. 77 * 78 * To speed comparisons when doing a lookup by name, we could make the mapping 79 * "private" (copy-on-write) and null-terminate the filenames after verifying 80 * the record structure. However, this requires a private mapping of 81 * every page that the Central Directory touches. Easier to tuck a copy 82 * of the string length into the hash table entry. 83 */ 84 85 /* 86 * Round up to the next highest power of 2. 87 * 88 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 89 */ 90 static uint32_t RoundUpPower2(uint32_t val) { 91 val--; 92 val |= val >> 1; 93 val |= val >> 2; 94 val |= val >> 4; 95 val |= val >> 8; 96 val |= val >> 16; 97 val++; 98 99 return val; 100 } 101 102 static uint32_t ComputeHash(const ZipString& name) { 103 uint32_t hash = 0; 104 uint16_t len = name.name_length; 105 const uint8_t* str = name.name; 106 107 while (len--) { 108 hash = hash * 31 + *str++; 109 } 110 111 return hash; 112 } 113 114 /* 115 * Convert a ZipEntry to a hash table index, verifying that it's in a 116 * valid range. 117 */ 118 static int64_t EntryToIndex(const ZipString* hash_table, const uint32_t hash_table_size, 119 const ZipString& name) { 120 const uint32_t hash = ComputeHash(name); 121 122 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. 123 uint32_t ent = hash & (hash_table_size - 1); 124 while (hash_table[ent].name != NULL) { 125 if (hash_table[ent] == name) { 126 return ent; 127 } 128 129 ent = (ent + 1) & (hash_table_size - 1); 130 } 131 132 ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name); 133 return kEntryNotFound; 134 } 135 136 /* 137 * Add a new entry to the hash table. 138 */ 139 static int32_t AddToHash(ZipString* hash_table, const uint64_t hash_table_size, 140 const ZipString& name) { 141 const uint64_t hash = ComputeHash(name); 142 uint32_t ent = hash & (hash_table_size - 1); 143 144 /* 145 * We over-allocated the table, so we're guaranteed to find an empty slot. 146 * Further, we guarantee that the hashtable size is not 0. 147 */ 148 while (hash_table[ent].name != NULL) { 149 if (hash_table[ent] == name) { 150 // We've found a duplicate entry. We don't accept it 151 ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name); 152 return kDuplicateEntry; 153 } 154 ent = (ent + 1) & (hash_table_size - 1); 155 } 156 157 hash_table[ent].name = name.name; 158 hash_table[ent].name_length = name.name_length; 159 return 0; 160 } 161 162 static int32_t MapCentralDirectory0(const char* debug_file_name, ZipArchive* archive, 163 off64_t file_length, off64_t read_amount, uint8_t* scan_buffer) { 164 const off64_t search_start = file_length - read_amount; 165 166 if (!archive->mapped_zip.ReadAtOffset(scan_buffer, read_amount, search_start)) { 167 ALOGE("Zip: read %" PRId64 " from offset %" PRId64 " failed", static_cast<int64_t>(read_amount), 168 static_cast<int64_t>(search_start)); 169 return kIoError; 170 } 171 172 /* 173 * Scan backward for the EOCD magic. In an archive without a trailing 174 * comment, we'll find it on the first try. (We may want to consider 175 * doing an initial minimal read; if we don't find it, retry with a 176 * second read as above.) 177 */ 178 int i = read_amount - sizeof(EocdRecord); 179 for (; i >= 0; i--) { 180 if (scan_buffer[i] == 0x50) { 181 uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]); 182 if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) { 183 ALOGV("+++ Found EOCD at buf+%d", i); 184 break; 185 } 186 } 187 } 188 if (i < 0) { 189 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name); 190 return kInvalidFile; 191 } 192 193 const off64_t eocd_offset = search_start + i; 194 const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i); 195 /* 196 * Verify that there's no trailing space at the end of the central directory 197 * and its comment. 198 */ 199 const off64_t calculated_length = eocd_offset + sizeof(EocdRecord) + eocd->comment_length; 200 if (calculated_length != file_length) { 201 ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory", 202 static_cast<int64_t>(file_length - calculated_length)); 203 return kInvalidFile; 204 } 205 206 /* 207 * Grab the CD offset and size, and the number of entries in the 208 * archive and verify that they look reasonable. 209 */ 210 if (static_cast<off64_t>(eocd->cd_start_offset) + eocd->cd_size > eocd_offset) { 211 ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")", 212 eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset)); 213 #if defined(__ANDROID__) 214 if (eocd->cd_start_offset + eocd->cd_size <= eocd_offset) { 215 android_errorWriteLog(0x534e4554, "31251826"); 216 } 217 #endif 218 return kInvalidOffset; 219 } 220 if (eocd->num_records == 0) { 221 ALOGW("Zip: empty archive?"); 222 return kEmptyArchive; 223 } 224 225 ALOGV("+++ num_entries=%" PRIu32 " dir_size=%" PRIu32 " dir_offset=%" PRIu32, eocd->num_records, 226 eocd->cd_size, eocd->cd_start_offset); 227 228 /* 229 * It all looks good. Create a mapping for the CD, and set the fields 230 * in archive. 231 */ 232 233 if (!archive->InitializeCentralDirectory(debug_file_name, 234 static_cast<off64_t>(eocd->cd_start_offset), 235 static_cast<size_t>(eocd->cd_size))) { 236 ALOGE("Zip: failed to intialize central directory.\n"); 237 return kMmapFailed; 238 } 239 240 archive->num_entries = eocd->num_records; 241 archive->directory_offset = eocd->cd_start_offset; 242 243 return 0; 244 } 245 246 /* 247 * Find the zip Central Directory and memory-map it. 248 * 249 * On success, returns 0 after populating fields from the EOCD area: 250 * directory_offset 251 * directory_ptr 252 * num_entries 253 */ 254 static int32_t MapCentralDirectory(const char* debug_file_name, ZipArchive* archive) { 255 // Test file length. We use lseek64 to make sure the file 256 // is small enough to be a zip file (Its size must be less than 257 // 0xffffffff bytes). 258 off64_t file_length = archive->mapped_zip.GetFileLength(); 259 if (file_length == -1) { 260 return kInvalidFile; 261 } 262 263 if (file_length > static_cast<off64_t>(0xffffffff)) { 264 ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length)); 265 return kInvalidFile; 266 } 267 268 if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) { 269 ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length)); 270 return kInvalidFile; 271 } 272 273 /* 274 * Perform the traditional EOCD snipe hunt. 275 * 276 * We're searching for the End of Central Directory magic number, 277 * which appears at the start of the EOCD block. It's followed by 278 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We 279 * need to read the last part of the file into a buffer, dig through 280 * it to find the magic number, parse some values out, and use those 281 * to determine the extent of the CD. 282 * 283 * We start by pulling in the last part of the file. 284 */ 285 off64_t read_amount = kMaxEOCDSearch; 286 if (file_length < read_amount) { 287 read_amount = file_length; 288 } 289 290 std::vector<uint8_t> scan_buffer(read_amount); 291 int32_t result = 292 MapCentralDirectory0(debug_file_name, archive, file_length, read_amount, scan_buffer.data()); 293 return result; 294 } 295 296 /* 297 * Parses the Zip archive's Central Directory. Allocates and populates the 298 * hash table. 299 * 300 * Returns 0 on success. 301 */ 302 static int32_t ParseZipArchive(ZipArchive* archive) { 303 const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr(); 304 const size_t cd_length = archive->central_directory.GetMapLength(); 305 const uint16_t num_entries = archive->num_entries; 306 307 /* 308 * Create hash table. We have a minimum 75% load factor, possibly as 309 * low as 50% after we round off to a power of 2. There must be at 310 * least one unused entry to avoid an infinite loop during creation. 311 */ 312 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3); 313 archive->hash_table = 314 reinterpret_cast<ZipString*>(calloc(archive->hash_table_size, sizeof(ZipString))); 315 if (archive->hash_table == nullptr) { 316 ALOGW("Zip: unable to allocate the %u-entry hash_table, entry size: %zu", 317 archive->hash_table_size, sizeof(ZipString)); 318 return -1; 319 } 320 321 /* 322 * Walk through the central directory, adding entries to the hash 323 * table and verifying values. 324 */ 325 const uint8_t* const cd_end = cd_ptr + cd_length; 326 const uint8_t* ptr = cd_ptr; 327 for (uint16_t i = 0; i < num_entries; i++) { 328 if (ptr > cd_end - sizeof(CentralDirectoryRecord)) { 329 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i); 330 #if defined(__ANDROID__) 331 android_errorWriteLog(0x534e4554, "36392138"); 332 #endif 333 return -1; 334 } 335 336 const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr); 337 if (cdr->record_signature != CentralDirectoryRecord::kSignature) { 338 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i); 339 return -1; 340 } 341 342 const off64_t local_header_offset = cdr->local_file_header_offset; 343 if (local_header_offset >= archive->directory_offset) { 344 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, 345 static_cast<int64_t>(local_header_offset), i); 346 return -1; 347 } 348 349 const uint16_t file_name_length = cdr->file_name_length; 350 const uint16_t extra_length = cdr->extra_field_length; 351 const uint16_t comment_length = cdr->comment_length; 352 const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord); 353 354 if (file_name + file_name_length > cd_end) { 355 ALOGW( 356 "Zip: file name boundary exceeds the central directory range, file_name_length: " 357 "%" PRIx16 ", cd_length: %zu", 358 file_name_length, cd_length); 359 return -1; 360 } 361 /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */ 362 if (!IsValidEntryName(file_name, file_name_length)) { 363 return -1; 364 } 365 366 /* add the CDE filename to the hash table */ 367 ZipString entry_name; 368 entry_name.name = file_name; 369 entry_name.name_length = file_name_length; 370 const int add_result = AddToHash(archive->hash_table, archive->hash_table_size, entry_name); 371 if (add_result != 0) { 372 ALOGW("Zip: Error adding entry to hash table %d", add_result); 373 return add_result; 374 } 375 376 ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length; 377 if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) { 378 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16, ptr - cd_ptr, cd_length, i); 379 return -1; 380 } 381 } 382 383 uint32_t lfh_start_bytes; 384 if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&lfh_start_bytes), 385 sizeof(uint32_t), 0)) { 386 ALOGW("Zip: Unable to read header for entry at offset == 0."); 387 return -1; 388 } 389 390 if (lfh_start_bytes != LocalFileHeader::kSignature) { 391 ALOGW("Zip: Entry at offset zero has invalid LFH signature %" PRIx32, lfh_start_bytes); 392 #if defined(__ANDROID__) 393 android_errorWriteLog(0x534e4554, "64211847"); 394 #endif 395 return -1; 396 } 397 398 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries); 399 400 return 0; 401 } 402 403 static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) { 404 int32_t result = -1; 405 if ((result = MapCentralDirectory(debug_file_name, archive)) != 0) { 406 return result; 407 } 408 409 if ((result = ParseZipArchive(archive))) { 410 return result; 411 } 412 413 return 0; 414 } 415 416 int32_t OpenArchiveFd(int fd, const char* debug_file_name, ZipArchiveHandle* handle, 417 bool assume_ownership) { 418 ZipArchive* archive = new ZipArchive(fd, assume_ownership); 419 *handle = archive; 420 return OpenArchiveInternal(archive, debug_file_name); 421 } 422 423 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) { 424 const int fd = open(fileName, O_RDONLY | O_BINARY, 0); 425 ZipArchive* archive = new ZipArchive(fd, true); 426 *handle = archive; 427 428 if (fd < 0) { 429 ALOGW("Unable to open '%s': %s", fileName, strerror(errno)); 430 return kIoError; 431 } 432 433 return OpenArchiveInternal(archive, fileName); 434 } 435 436 int32_t OpenArchiveFromMemory(void* address, size_t length, const char* debug_file_name, 437 ZipArchiveHandle* handle) { 438 ZipArchive* archive = new ZipArchive(address, length); 439 *handle = archive; 440 return OpenArchiveInternal(archive, debug_file_name); 441 } 442 443 /* 444 * Close a ZipArchive, closing the file and freeing the contents. 445 */ 446 void CloseArchive(ZipArchiveHandle handle) { 447 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle); 448 ALOGV("Closing archive %p", archive); 449 delete archive; 450 } 451 452 static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) { 453 uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)]; 454 if (!mapped_zip.ReadData(ddBuf, sizeof(ddBuf))) { 455 return kIoError; 456 } 457 458 const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf)); 459 const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0; 460 const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset); 461 462 // Validate that the values in the data descriptor match those in the central 463 // directory. 464 if (entry->compressed_length != descriptor->compressed_size || 465 entry->uncompressed_length != descriptor->uncompressed_size || 466 entry->crc32 != descriptor->crc32) { 467 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32 468 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}", 469 entry->compressed_length, entry->uncompressed_length, entry->crc32, 470 descriptor->compressed_size, descriptor->uncompressed_size, descriptor->crc32); 471 return kInconsistentInformation; 472 } 473 474 return 0; 475 } 476 477 static int32_t FindEntry(const ZipArchive* archive, const int ent, ZipEntry* data) { 478 const uint16_t nameLen = archive->hash_table[ent].name_length; 479 480 // Recover the start of the central directory entry from the filename 481 // pointer. The filename is the first entry past the fixed-size data, 482 // so we can just subtract back from that. 483 const uint8_t* ptr = archive->hash_table[ent].name; 484 ptr -= sizeof(CentralDirectoryRecord); 485 486 // This is the base of our mmapped region, we have to sanity check that 487 // the name that's in the hash table is a pointer to a location within 488 // this mapped region. 489 const uint8_t* base_ptr = archive->central_directory.GetBasePtr(); 490 if (ptr < base_ptr || ptr > base_ptr + archive->central_directory.GetMapLength()) { 491 ALOGW("Zip: Invalid entry pointer"); 492 return kInvalidOffset; 493 } 494 495 const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr); 496 497 // The offset of the start of the central directory in the zipfile. 498 // We keep this lying around so that we can sanity check all our lengths 499 // and our per-file structures. 500 const off64_t cd_offset = archive->directory_offset; 501 502 // Fill out the compression method, modification time, crc32 503 // and other interesting attributes from the central directory. These 504 // will later be compared against values from the local file header. 505 data->method = cdr->compression_method; 506 data->mod_time = cdr->last_mod_date << 16 | cdr->last_mod_time; 507 data->crc32 = cdr->crc32; 508 data->compressed_length = cdr->compressed_size; 509 data->uncompressed_length = cdr->uncompressed_size; 510 511 // Figure out the local header offset from the central directory. The 512 // actual file data will begin after the local header and the name / 513 // extra comments. 514 const off64_t local_header_offset = cdr->local_file_header_offset; 515 if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) { 516 ALOGW("Zip: bad local hdr offset in zip"); 517 return kInvalidOffset; 518 } 519 520 uint8_t lfh_buf[sizeof(LocalFileHeader)]; 521 if (!archive->mapped_zip.ReadAtOffset(lfh_buf, sizeof(lfh_buf), local_header_offset)) { 522 ALOGW("Zip: failed reading lfh name from offset %" PRId64, 523 static_cast<int64_t>(local_header_offset)); 524 return kIoError; 525 } 526 527 const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf); 528 529 if (lfh->lfh_signature != LocalFileHeader::kSignature) { 530 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64, 531 static_cast<int64_t>(local_header_offset)); 532 return kInvalidOffset; 533 } 534 535 // Paranoia: Match the values specified in the local file header 536 // to those specified in the central directory. 537 538 // Warn if central directory and local file header don't agree on the use 539 // of a trailing Data Descriptor. The reference implementation is inconsistent 540 // and appears to use the LFH value during extraction (unzip) but the CD value 541 // while displayng information about archives (zipinfo). The spec remains 542 // silent on this inconsistency as well. 543 // 544 // For now, always use the version from the LFH but make sure that the values 545 // specified in the central directory match those in the data descriptor. 546 // 547 // NOTE: It's also worth noting that unzip *does* warn about inconsistencies in 548 // bit 11 (EFS: The language encoding flag, marking that filename and comment are 549 // encoded using UTF-8). This implementation does not check for the presence of 550 // that flag and always enforces that entry names are valid UTF-8. 551 if ((lfh->gpb_flags & kGPBDDFlagMask) != (cdr->gpb_flags & kGPBDDFlagMask)) { 552 ALOGW("Zip: gpb flag mismatch at bit 3. expected {%04" PRIx16 "}, was {%04" PRIx16 "}", 553 cdr->gpb_flags, lfh->gpb_flags); 554 } 555 556 // If there is no trailing data descriptor, verify that the central directory and local file 557 // header agree on the crc, compressed, and uncompressed sizes of the entry. 558 if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) { 559 data->has_data_descriptor = 0; 560 if (data->compressed_length != lfh->compressed_size || 561 data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) { 562 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32 563 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}", 564 data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size, 565 lfh->uncompressed_size, lfh->crc32); 566 return kInconsistentInformation; 567 } 568 } else { 569 data->has_data_descriptor = 1; 570 } 571 572 // 4.4.2.1: the upper byte of `version_made_by` gives the source OS. Unix is 3. 573 if ((cdr->version_made_by >> 8) == 3) { 574 data->unix_mode = (cdr->external_file_attributes >> 16) & 0xffff; 575 } else { 576 data->unix_mode = 0777; 577 } 578 579 // Check that the local file header name matches the declared 580 // name in the central directory. 581 if (lfh->file_name_length == nameLen) { 582 const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); 583 if (name_offset + lfh->file_name_length > cd_offset) { 584 ALOGW("Zip: Invalid declared length"); 585 return kInvalidOffset; 586 } 587 588 std::vector<uint8_t> name_buf(nameLen); 589 if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { 590 ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset)); 591 return kIoError; 592 } 593 594 if (memcmp(archive->hash_table[ent].name, name_buf.data(), nameLen)) { 595 return kInconsistentInformation; 596 } 597 598 } else { 599 ALOGW("Zip: lfh name did not match central directory."); 600 return kInconsistentInformation; 601 } 602 603 const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) + 604 lfh->file_name_length + lfh->extra_field_length; 605 if (data_offset > cd_offset) { 606 ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset)); 607 return kInvalidOffset; 608 } 609 610 if (static_cast<off64_t>(data_offset + data->compressed_length) > cd_offset) { 611 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")", 612 static_cast<int64_t>(data_offset), data->compressed_length, 613 static_cast<int64_t>(cd_offset)); 614 return kInvalidOffset; 615 } 616 617 if (data->method == kCompressStored && 618 static_cast<off64_t>(data_offset + data->uncompressed_length) > cd_offset) { 619 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")", 620 static_cast<int64_t>(data_offset), data->uncompressed_length, 621 static_cast<int64_t>(cd_offset)); 622 return kInvalidOffset; 623 } 624 625 data->offset = data_offset; 626 return 0; 627 } 628 629 struct IterationHandle { 630 uint32_t position; 631 // We're not using vector here because this code is used in the Windows SDK 632 // where the STL is not available. 633 ZipString prefix; 634 ZipString suffix; 635 ZipArchive* archive; 636 637 IterationHandle(const ZipString* in_prefix, const ZipString* in_suffix) { 638 if (in_prefix) { 639 uint8_t* name_copy = new uint8_t[in_prefix->name_length]; 640 memcpy(name_copy, in_prefix->name, in_prefix->name_length); 641 prefix.name = name_copy; 642 prefix.name_length = in_prefix->name_length; 643 } else { 644 prefix.name = NULL; 645 prefix.name_length = 0; 646 } 647 if (in_suffix) { 648 uint8_t* name_copy = new uint8_t[in_suffix->name_length]; 649 memcpy(name_copy, in_suffix->name, in_suffix->name_length); 650 suffix.name = name_copy; 651 suffix.name_length = in_suffix->name_length; 652 } else { 653 suffix.name = NULL; 654 suffix.name_length = 0; 655 } 656 } 657 658 ~IterationHandle() { 659 delete[] prefix.name; 660 delete[] suffix.name; 661 } 662 }; 663 664 int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const ZipString* optional_prefix, 665 const ZipString* optional_suffix) { 666 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle); 667 668 if (archive == NULL || archive->hash_table == NULL) { 669 ALOGW("Zip: Invalid ZipArchiveHandle"); 670 return kInvalidHandle; 671 } 672 673 IterationHandle* cookie = new IterationHandle(optional_prefix, optional_suffix); 674 cookie->position = 0; 675 cookie->archive = archive; 676 677 *cookie_ptr = cookie; 678 return 0; 679 } 680 681 void EndIteration(void* cookie) { 682 delete reinterpret_cast<IterationHandle*>(cookie); 683 } 684 685 int32_t FindEntry(const ZipArchiveHandle handle, const ZipString& entryName, ZipEntry* data) { 686 const ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle); 687 if (entryName.name_length == 0) { 688 ALOGW("Zip: Invalid filename %.*s", entryName.name_length, entryName.name); 689 return kInvalidEntryName; 690 } 691 692 const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, entryName); 693 694 if (ent < 0) { 695 ALOGV("Zip: Could not find entry %.*s", entryName.name_length, entryName.name); 696 return ent; 697 } 698 699 return FindEntry(archive, ent, data); 700 } 701 702 int32_t Next(void* cookie, ZipEntry* data, ZipString* name) { 703 IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie); 704 if (handle == NULL) { 705 return kInvalidHandle; 706 } 707 708 ZipArchive* archive = handle->archive; 709 if (archive == NULL || archive->hash_table == NULL) { 710 ALOGW("Zip: Invalid ZipArchiveHandle"); 711 return kInvalidHandle; 712 } 713 714 const uint32_t currentOffset = handle->position; 715 const uint32_t hash_table_length = archive->hash_table_size; 716 const ZipString* hash_table = archive->hash_table; 717 718 for (uint32_t i = currentOffset; i < hash_table_length; ++i) { 719 if (hash_table[i].name != NULL && 720 (handle->prefix.name_length == 0 || hash_table[i].StartsWith(handle->prefix)) && 721 (handle->suffix.name_length == 0 || hash_table[i].EndsWith(handle->suffix))) { 722 handle->position = (i + 1); 723 const int error = FindEntry(archive, i, data); 724 if (!error) { 725 name->name = hash_table[i].name; 726 name->name_length = hash_table[i].name_length; 727 } 728 729 return error; 730 } 731 } 732 733 handle->position = 0; 734 return kIterationEnd; 735 } 736 737 class Writer { 738 public: 739 virtual bool Append(uint8_t* buf, size_t buf_size) = 0; 740 virtual ~Writer() {} 741 742 protected: 743 Writer() = default; 744 745 private: 746 DISALLOW_COPY_AND_ASSIGN(Writer); 747 }; 748 749 // A Writer that writes data to a fixed size memory region. 750 // The size of the memory region must be equal to the total size of 751 // the data appended to it. 752 class MemoryWriter : public Writer { 753 public: 754 MemoryWriter(uint8_t* buf, size_t size) : Writer(), buf_(buf), size_(size), bytes_written_(0) {} 755 756 virtual bool Append(uint8_t* buf, size_t buf_size) override { 757 if (bytes_written_ + buf_size > size_) { 758 ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)", size_, 759 bytes_written_ + buf_size); 760 return false; 761 } 762 763 memcpy(buf_ + bytes_written_, buf, buf_size); 764 bytes_written_ += buf_size; 765 return true; 766 } 767 768 private: 769 uint8_t* const buf_; 770 const size_t size_; 771 size_t bytes_written_; 772 }; 773 774 // A Writer that appends data to a file |fd| at its current position. 775 // The file will be truncated to the end of the written data. 776 class FileWriter : public Writer { 777 public: 778 // Creates a FileWriter for |fd| and prepare to write |entry| to it, 779 // guaranteeing that the file descriptor is valid and that there's enough 780 // space on the volume to write out the entry completely and that the file 781 // is truncated to the correct length (no truncation if |fd| references a 782 // block device). 783 // 784 // Returns a valid FileWriter on success, |nullptr| if an error occurred. 785 static std::unique_ptr<FileWriter> Create(int fd, const ZipEntry* entry) { 786 const uint32_t declared_length = entry->uncompressed_length; 787 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR); 788 if (current_offset == -1) { 789 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno)); 790 return nullptr; 791 } 792 793 int result = 0; 794 #if defined(__linux__) 795 if (declared_length > 0) { 796 // Make sure we have enough space on the volume to extract the compressed 797 // entry. Note that the call to ftruncate below will change the file size but 798 // will not allocate space on disk and this call to fallocate will not 799 // change the file size. 800 // Note: fallocate is only supported by the following filesystems - 801 // btrfs, ext4, ocfs2, and xfs. Therefore fallocate might fail with 802 // EOPNOTSUPP error when issued in other filesystems. 803 // Hence, check for the return error code before concluding that the 804 // disk does not have enough space. 805 result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length)); 806 if (result == -1 && errno == ENOSPC) { 807 ALOGW("Zip: unable to allocate %" PRId64 " bytes at offset %" PRId64 " : %s", 808 static_cast<int64_t>(declared_length), static_cast<int64_t>(current_offset), 809 strerror(errno)); 810 return std::unique_ptr<FileWriter>(nullptr); 811 } 812 } 813 #endif // __linux__ 814 815 struct stat sb; 816 if (fstat(fd, &sb) == -1) { 817 ALOGW("Zip: unable to fstat file: %s", strerror(errno)); 818 return std::unique_ptr<FileWriter>(nullptr); 819 } 820 821 // Block device doesn't support ftruncate(2). 822 if (!S_ISBLK(sb.st_mode)) { 823 result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset)); 824 if (result == -1) { 825 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s", 826 static_cast<int64_t>(declared_length + current_offset), strerror(errno)); 827 return std::unique_ptr<FileWriter>(nullptr); 828 } 829 } 830 831 return std::unique_ptr<FileWriter>(new FileWriter(fd, declared_length)); 832 } 833 834 virtual bool Append(uint8_t* buf, size_t buf_size) override { 835 if (total_bytes_written_ + buf_size > declared_length_) { 836 ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)", declared_length_, 837 total_bytes_written_ + buf_size); 838 return false; 839 } 840 841 const bool result = android::base::WriteFully(fd_, buf, buf_size); 842 if (result) { 843 total_bytes_written_ += buf_size; 844 } else { 845 ALOGW("Zip: unable to write " ZD " bytes to file; %s", buf_size, strerror(errno)); 846 } 847 848 return result; 849 } 850 851 private: 852 FileWriter(const int fd, const size_t declared_length) 853 : Writer(), fd_(fd), declared_length_(declared_length), total_bytes_written_(0) {} 854 855 const int fd_; 856 const size_t declared_length_; 857 size_t total_bytes_written_; 858 }; 859 860 // This method is using libz macros with old-style-casts 861 #pragma GCC diagnostic push 862 #pragma GCC diagnostic ignored "-Wold-style-cast" 863 static inline int zlib_inflateInit2(z_stream* stream, int window_bits) { 864 return inflateInit2(stream, window_bits); 865 } 866 #pragma GCC diagnostic pop 867 868 static int32_t InflateEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry* entry, 869 Writer* writer, uint64_t* crc_out) { 870 const size_t kBufSize = 32768; 871 std::vector<uint8_t> read_buf(kBufSize); 872 std::vector<uint8_t> write_buf(kBufSize); 873 z_stream zstream; 874 int zerr; 875 876 /* 877 * Initialize the zlib stream struct. 878 */ 879 memset(&zstream, 0, sizeof(zstream)); 880 zstream.zalloc = Z_NULL; 881 zstream.zfree = Z_NULL; 882 zstream.opaque = Z_NULL; 883 zstream.next_in = NULL; 884 zstream.avail_in = 0; 885 zstream.next_out = &write_buf[0]; 886 zstream.avail_out = kBufSize; 887 zstream.data_type = Z_UNKNOWN; 888 889 /* 890 * Use the undocumented "negative window bits" feature to tell zlib 891 * that there's no zlib header waiting for it. 892 */ 893 zerr = zlib_inflateInit2(&zstream, -MAX_WBITS); 894 if (zerr != Z_OK) { 895 if (zerr == Z_VERSION_ERROR) { 896 ALOGE("Installed zlib is not compatible with linked version (%s)", ZLIB_VERSION); 897 } else { 898 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr); 899 } 900 901 return kZlibError; 902 } 903 904 auto zstream_deleter = [](z_stream* stream) { 905 inflateEnd(stream); /* free up any allocated structures */ 906 }; 907 908 std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter); 909 910 const uint32_t uncompressed_length = entry->uncompressed_length; 911 912 uint64_t crc = 0; 913 uint32_t compressed_length = entry->compressed_length; 914 do { 915 /* read as much as we can */ 916 if (zstream.avail_in == 0) { 917 const size_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length; 918 if (!mapped_zip.ReadData(read_buf.data(), getSize)) { 919 ALOGW("Zip: inflate read failed, getSize = %zu: %s", getSize, strerror(errno)); 920 return kIoError; 921 } 922 923 compressed_length -= getSize; 924 925 zstream.next_in = &read_buf[0]; 926 zstream.avail_in = getSize; 927 } 928 929 /* uncompress the data */ 930 zerr = inflate(&zstream, Z_NO_FLUSH); 931 if (zerr != Z_OK && zerr != Z_STREAM_END) { 932 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", zerr, zstream.next_in, 933 zstream.avail_in, zstream.next_out, zstream.avail_out); 934 return kZlibError; 935 } 936 937 /* write when we're full or when we're done */ 938 if (zstream.avail_out == 0 || (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) { 939 const size_t write_size = zstream.next_out - &write_buf[0]; 940 if (!writer->Append(&write_buf[0], write_size)) { 941 // The file might have declared a bogus length. 942 return kInconsistentInformation; 943 } else { 944 crc = crc32(crc, &write_buf[0], write_size); 945 } 946 947 zstream.next_out = &write_buf[0]; 948 zstream.avail_out = kBufSize; 949 } 950 } while (zerr == Z_OK); 951 952 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 953 954 // NOTE: zstream.adler is always set to 0, because we're using the -MAX_WBITS 955 // "feature" of zlib to tell it there won't be a zlib file header. zlib 956 // doesn't bother calculating the checksum in that scenario. We just do 957 // it ourselves above because there are no additional gains to be made by 958 // having zlib calculate it for us, since they do it by calling crc32 in 959 // the same manner that we have above. 960 *crc_out = crc; 961 962 if (zstream.total_out != uncompressed_length || compressed_length != 0) { 963 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")", zstream.total_out, 964 uncompressed_length); 965 return kInconsistentInformation; 966 } 967 968 return 0; 969 } 970 971 static int32_t CopyEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry* entry, Writer* writer, 972 uint64_t* crc_out) { 973 static const uint32_t kBufSize = 32768; 974 std::vector<uint8_t> buf(kBufSize); 975 976 const uint32_t length = entry->uncompressed_length; 977 uint32_t count = 0; 978 uint64_t crc = 0; 979 while (count < length) { 980 uint32_t remaining = length - count; 981 982 // Safe conversion because kBufSize is narrow enough for a 32 bit signed 983 // value. 984 const size_t block_size = (remaining > kBufSize) ? kBufSize : remaining; 985 if (!mapped_zip.ReadData(buf.data(), block_size)) { 986 ALOGW("CopyFileToFile: copy read failed, block_size = %zu: %s", block_size, strerror(errno)); 987 return kIoError; 988 } 989 990 if (!writer->Append(&buf[0], block_size)) { 991 return kIoError; 992 } 993 crc = crc32(crc, &buf[0], block_size); 994 count += block_size; 995 } 996 997 *crc_out = crc; 998 999 return 0; 1000 } 1001 1002 int32_t ExtractToWriter(ZipArchiveHandle handle, ZipEntry* entry, Writer* writer) { 1003 ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle); 1004 const uint16_t method = entry->method; 1005 off64_t data_offset = entry->offset; 1006 1007 if (!archive->mapped_zip.SeekToOffset(data_offset)) { 1008 ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset)); 1009 return kIoError; 1010 } 1011 1012 // this should default to kUnknownCompressionMethod. 1013 int32_t return_value = -1; 1014 uint64_t crc = 0; 1015 if (method == kCompressStored) { 1016 return_value = CopyEntryToWriter(archive->mapped_zip, entry, writer, &crc); 1017 } else if (method == kCompressDeflated) { 1018 return_value = InflateEntryToWriter(archive->mapped_zip, entry, writer, &crc); 1019 } 1020 1021 if (!return_value && entry->has_data_descriptor) { 1022 return_value = ValidateDataDescriptor(archive->mapped_zip, entry); 1023 if (return_value) { 1024 return return_value; 1025 } 1026 } 1027 1028 // Validate that the CRC matches the calculated value. 1029 if (kCrcChecksEnabled && (entry->crc32 != static_cast<uint32_t>(crc))) { 1030 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc); 1031 return kInconsistentInformation; 1032 } 1033 1034 return return_value; 1035 } 1036 1037 int32_t ExtractToMemory(ZipArchiveHandle handle, ZipEntry* entry, uint8_t* begin, uint32_t size) { 1038 std::unique_ptr<Writer> writer(new MemoryWriter(begin, size)); 1039 return ExtractToWriter(handle, entry, writer.get()); 1040 } 1041 1042 int32_t ExtractEntryToFile(ZipArchiveHandle handle, ZipEntry* entry, int fd) { 1043 std::unique_ptr<Writer> writer(FileWriter::Create(fd, entry)); 1044 if (writer.get() == nullptr) { 1045 return kIoError; 1046 } 1047 1048 return ExtractToWriter(handle, entry, writer.get()); 1049 } 1050 1051 const char* ErrorCodeString(int32_t error_code) { 1052 // Make sure that the number of entries in kErrorMessages and ErrorCodes 1053 // match. 1054 static_assert((-kLastErrorCode + 1) == arraysize(kErrorMessages), 1055 "(-kLastErrorCode + 1) != arraysize(kErrorMessages)"); 1056 1057 const uint32_t idx = -error_code; 1058 if (idx < arraysize(kErrorMessages)) { 1059 return kErrorMessages[idx]; 1060 } 1061 1062 return "Unknown return code"; 1063 } 1064 1065 int GetFileDescriptor(const ZipArchiveHandle handle) { 1066 return reinterpret_cast<ZipArchive*>(handle)->mapped_zip.GetFileDescriptor(); 1067 } 1068 1069 ZipString::ZipString(const char* entry_name) : name(reinterpret_cast<const uint8_t*>(entry_name)) { 1070 size_t len = strlen(entry_name); 1071 CHECK_LE(len, static_cast<size_t>(UINT16_MAX)); 1072 name_length = static_cast<uint16_t>(len); 1073 } 1074 1075 #if !defined(_WIN32) 1076 class ProcessWriter : public Writer { 1077 public: 1078 ProcessWriter(ProcessZipEntryFunction func, void* cookie) 1079 : Writer(), proc_function_(func), cookie_(cookie) {} 1080 1081 virtual bool Append(uint8_t* buf, size_t buf_size) override { 1082 return proc_function_(buf, buf_size, cookie_); 1083 } 1084 1085 private: 1086 ProcessZipEntryFunction proc_function_; 1087 void* cookie_; 1088 }; 1089 1090 int32_t ProcessZipEntryContents(ZipArchiveHandle handle, ZipEntry* entry, 1091 ProcessZipEntryFunction func, void* cookie) { 1092 ProcessWriter writer(func, cookie); 1093 return ExtractToWriter(handle, entry, &writer); 1094 } 1095 1096 #endif //! defined(_WIN32) 1097 1098 int MappedZipFile::GetFileDescriptor() const { 1099 if (!has_fd_) { 1100 ALOGW("Zip: MappedZipFile doesn't have a file descriptor."); 1101 return -1; 1102 } 1103 return fd_; 1104 } 1105 1106 void* MappedZipFile::GetBasePtr() const { 1107 if (has_fd_) { 1108 ALOGW("Zip: MappedZipFile doesn't have a base pointer."); 1109 return nullptr; 1110 } 1111 return base_ptr_; 1112 } 1113 1114 off64_t MappedZipFile::GetFileLength() const { 1115 if (has_fd_) { 1116 off64_t result = lseek64(fd_, 0, SEEK_END); 1117 if (result == -1) { 1118 ALOGE("Zip: lseek on fd %d failed: %s", fd_, strerror(errno)); 1119 } 1120 return result; 1121 } else { 1122 if (base_ptr_ == nullptr) { 1123 ALOGE("Zip: invalid file map\n"); 1124 return -1; 1125 } 1126 return static_cast<off64_t>(data_length_); 1127 } 1128 } 1129 1130 bool MappedZipFile::SeekToOffset(off64_t offset) { 1131 if (has_fd_) { 1132 if (lseek64(fd_, offset, SEEK_SET) != offset) { 1133 ALOGE("Zip: lseek to %" PRId64 " failed: %s\n", offset, strerror(errno)); 1134 return false; 1135 } 1136 return true; 1137 } else { 1138 if (offset < 0 || offset > static_cast<off64_t>(data_length_)) { 1139 ALOGE("Zip: invalid offset: %" PRId64 ", data length: %" PRId64 "\n", offset, data_length_); 1140 return false; 1141 } 1142 1143 read_pos_ = offset; 1144 return true; 1145 } 1146 } 1147 1148 bool MappedZipFile::ReadData(uint8_t* buffer, size_t read_amount) { 1149 if (has_fd_) { 1150 if (!android::base::ReadFully(fd_, buffer, read_amount)) { 1151 ALOGE("Zip: read from %d failed\n", fd_); 1152 return false; 1153 } 1154 } else { 1155 memcpy(buffer, static_cast<uint8_t*>(base_ptr_) + read_pos_, read_amount); 1156 read_pos_ += read_amount; 1157 } 1158 return true; 1159 } 1160 1161 // Attempts to read |len| bytes into |buf| at offset |off|. 1162 bool MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) { 1163 #if !defined(_WIN32) 1164 if (has_fd_) { 1165 if (static_cast<size_t>(TEMP_FAILURE_RETRY(pread64(fd_, buf, len, off))) != len) { 1166 ALOGE("Zip: failed to read at offset %" PRId64 "\n", off); 1167 return false; 1168 } 1169 return true; 1170 } 1171 #endif 1172 if (!SeekToOffset(off)) { 1173 return false; 1174 } 1175 return ReadData(buf, len); 1176 } 1177 1178 void CentralDirectory::Initialize(void* map_base_ptr, off64_t cd_start_offset, size_t cd_size) { 1179 base_ptr_ = static_cast<uint8_t*>(map_base_ptr) + cd_start_offset; 1180 length_ = cd_size; 1181 } 1182 1183 bool ZipArchive::InitializeCentralDirectory(const char* debug_file_name, off64_t cd_start_offset, 1184 size_t cd_size) { 1185 if (mapped_zip.HasFd()) { 1186 if (!directory_map->create(debug_file_name, mapped_zip.GetFileDescriptor(), cd_start_offset, 1187 cd_size, true /* read only */)) { 1188 return false; 1189 } 1190 1191 CHECK_EQ(directory_map->getDataLength(), cd_size); 1192 central_directory.Initialize(directory_map->getDataPtr(), 0 /*offset*/, cd_size); 1193 } else { 1194 if (mapped_zip.GetBasePtr() == nullptr) { 1195 ALOGE("Zip: Failed to map central directory, bad mapped_zip base pointer\n"); 1196 return false; 1197 } 1198 if (static_cast<off64_t>(cd_start_offset) + static_cast<off64_t>(cd_size) > 1199 mapped_zip.GetFileLength()) { 1200 ALOGE( 1201 "Zip: Failed to map central directory, offset exceeds mapped memory region (" 1202 "start_offset %" PRId64 ", cd_size %zu, mapped_region_size %" PRId64 ")", 1203 static_cast<int64_t>(cd_start_offset), cd_size, mapped_zip.GetFileLength()); 1204 return false; 1205 } 1206 1207 central_directory.Initialize(mapped_zip.GetBasePtr(), cd_start_offset, cd_size); 1208 } 1209 return true; 1210 } 1211 1212 tm ZipEntry::GetModificationTime() const { 1213 tm t = {}; 1214 1215 t.tm_hour = (mod_time >> 11) & 0x1f; 1216 t.tm_min = (mod_time >> 5) & 0x3f; 1217 t.tm_sec = (mod_time & 0x1f) << 1; 1218 1219 t.tm_year = ((mod_time >> 25) & 0x7f) + 80; 1220 t.tm_mon = ((mod_time >> 21) & 0xf) - 1; 1221 t.tm_mday = (mod_time >> 16) & 0x1f; 1222 1223 return t; 1224 } 1225