1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "zip_archive.h" 18 19 #include <vector> 20 21 #include <fcntl.h> 22 #include <sys/stat.h> 23 #include <sys/types.h> 24 #include <unistd.h> 25 26 #include "base/unix_file/fd_file.h" 27 #include "UniquePtr.h" 28 29 namespace art { 30 31 static const size_t kBufSize = 32 * KB; 32 33 // Get 2 little-endian bytes. 34 static uint32_t Le16ToHost(const byte* src) { 35 return ((src[0] << 0) | 36 (src[1] << 8)); 37 } 38 39 // Get 4 little-endian bytes. 40 static uint32_t Le32ToHost(const byte* src) { 41 return ((src[0] << 0) | 42 (src[1] << 8) | 43 (src[2] << 16) | 44 (src[3] << 24)); 45 } 46 47 uint16_t ZipEntry::GetCompressionMethod() { 48 return Le16ToHost(ptr_ + ZipArchive::kCDEMethod); 49 } 50 51 uint32_t ZipEntry::GetCompressedLength() { 52 return Le32ToHost(ptr_ + ZipArchive::kCDECompLen); 53 } 54 55 uint32_t ZipEntry::GetUncompressedLength() { 56 return Le32ToHost(ptr_ + ZipArchive::kCDEUncompLen); 57 } 58 59 uint32_t ZipEntry::GetCrc32() { 60 return Le32ToHost(ptr_ + ZipArchive::kCDECRC); 61 } 62 63 off64_t ZipEntry::GetDataOffset() { 64 // All we have is the offset to the Local File Header, which is 65 // variable size, so we have to read the contents of the struct to 66 // figure out where the actual data starts. 67 68 // We also need to make sure that the lengths are not so large that 69 // somebody trying to map the compressed or uncompressed data runs 70 // off the end of the mapped region. 71 72 off64_t dir_offset = zip_archive_->dir_offset_; 73 int64_t lfh_offset = Le32ToHost(ptr_ + ZipArchive::kCDELocalOffset); 74 if (lfh_offset + ZipArchive::kLFHLen >= dir_offset) { 75 LOG(WARNING) << "Zip: bad LFH offset in zip"; 76 return -1; 77 } 78 79 if (lseek64(zip_archive_->fd_, lfh_offset, SEEK_SET) != lfh_offset) { 80 PLOG(WARNING) << "Zip: failed seeking to LFH at offset " << lfh_offset; 81 return -1; 82 } 83 84 uint8_t lfh_buf[ZipArchive::kLFHLen]; 85 ssize_t actual = TEMP_FAILURE_RETRY(read(zip_archive_->fd_, lfh_buf, sizeof(lfh_buf))); 86 if (actual != sizeof(lfh_buf)) { 87 LOG(WARNING) << "Zip: failed reading LFH from offset " << lfh_offset; 88 return -1; 89 } 90 91 if (Le32ToHost(lfh_buf) != ZipArchive::kLFHSignature) { 92 LOG(WARNING) << "Zip: didn't find signature at start of LFH, offset " << lfh_offset; 93 return -1; 94 } 95 96 uint32_t gpbf = Le16ToHost(lfh_buf + ZipArchive::kLFHGPBFlags); 97 if ((gpbf & ZipArchive::kGPFUnsupportedMask) != 0) { 98 LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf; 99 return -1; 100 } 101 102 off64_t data_offset = (lfh_offset + ZipArchive::kLFHLen 103 + Le16ToHost(lfh_buf + ZipArchive::kLFHNameLen) 104 + Le16ToHost(lfh_buf + ZipArchive::kLFHExtraLen)); 105 if (data_offset >= dir_offset) { 106 LOG(WARNING) << "Zip: bad data offset " << data_offset << " in zip"; 107 return -1; 108 } 109 110 // check lengths 111 112 if (static_cast<off64_t>(data_offset + GetCompressedLength()) > dir_offset) { 113 LOG(WARNING) << "Zip: bad compressed length in zip " 114 << "(" << data_offset << " + " << GetCompressedLength() 115 << " > " << dir_offset << ")"; 116 return -1; 117 } 118 119 if (GetCompressionMethod() == kCompressStored 120 && static_cast<off64_t>(data_offset + GetUncompressedLength()) > dir_offset) { 121 LOG(WARNING) << "Zip: bad uncompressed length in zip " 122 << "(" << data_offset << " + " << GetUncompressedLength() 123 << " > " << dir_offset << ")"; 124 return -1; 125 } 126 127 return data_offset; 128 } 129 130 static bool CopyFdToMemory(uint8_t* begin, size_t size, int in, size_t count) { 131 uint8_t* dst = begin; 132 std::vector<uint8_t> buf(kBufSize); 133 while (count != 0) { 134 size_t bytes_to_read = (count > kBufSize) ? kBufSize : count; 135 ssize_t actual = TEMP_FAILURE_RETRY(read(in, &buf[0], bytes_to_read)); 136 if (actual != static_cast<ssize_t>(bytes_to_read)) { 137 PLOG(WARNING) << "Zip: short read"; 138 return false; 139 } 140 memcpy(dst, &buf[0], bytes_to_read); 141 dst += bytes_to_read; 142 count -= bytes_to_read; 143 } 144 DCHECK_EQ(dst, begin + size); 145 return true; 146 } 147 148 class ZStream { 149 public: 150 ZStream(byte* write_buf, size_t write_buf_size) { 151 // Initialize the zlib stream struct. 152 memset(&zstream_, 0, sizeof(zstream_)); 153 zstream_.zalloc = Z_NULL; 154 zstream_.zfree = Z_NULL; 155 zstream_.opaque = Z_NULL; 156 zstream_.next_in = NULL; 157 zstream_.avail_in = 0; 158 zstream_.next_out = reinterpret_cast<Bytef*>(write_buf); 159 zstream_.avail_out = write_buf_size; 160 zstream_.data_type = Z_UNKNOWN; 161 } 162 163 z_stream& Get() { 164 return zstream_; 165 } 166 167 ~ZStream() { 168 inflateEnd(&zstream_); 169 } 170 private: 171 z_stream zstream_; 172 }; 173 174 static bool InflateToMemory(uint8_t* begin, size_t size, 175 int in, size_t uncompressed_length, size_t compressed_length) { 176 uint8_t* dst = begin; 177 UniquePtr<uint8_t[]> read_buf(new uint8_t[kBufSize]); 178 UniquePtr<uint8_t[]> write_buf(new uint8_t[kBufSize]); 179 if (read_buf.get() == NULL || write_buf.get() == NULL) { 180 LOG(WARNING) << "Zip: failed to allocate buffer to inflate"; 181 return false; 182 } 183 184 UniquePtr<ZStream> zstream(new ZStream(write_buf.get(), kBufSize)); 185 186 // Use the undocumented "negative window bits" feature to tell zlib 187 // that there's no zlib header waiting for it. 188 int zerr = inflateInit2(&zstream->Get(), -MAX_WBITS); 189 if (zerr != Z_OK) { 190 if (zerr == Z_VERSION_ERROR) { 191 LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")"; 192 } else { 193 LOG(WARNING) << "Call to inflateInit2 failed (zerr=" << zerr << ")"; 194 } 195 return false; 196 } 197 198 size_t remaining = compressed_length; 199 do { 200 // read as much as we can 201 if (zstream->Get().avail_in == 0) { 202 size_t bytes_to_read = (remaining > kBufSize) ? kBufSize : remaining; 203 204 ssize_t actual = TEMP_FAILURE_RETRY(read(in, read_buf.get(), bytes_to_read)); 205 if (actual != static_cast<ssize_t>(bytes_to_read)) { 206 LOG(WARNING) << "Zip: inflate read failed (" << actual << " vs " << bytes_to_read << ")"; 207 return false; 208 } 209 remaining -= bytes_to_read; 210 zstream->Get().next_in = read_buf.get(); 211 zstream->Get().avail_in = bytes_to_read; 212 } 213 214 // uncompress the data 215 zerr = inflate(&zstream->Get(), Z_NO_FLUSH); 216 if (zerr != Z_OK && zerr != Z_STREAM_END) { 217 LOG(WARNING) << "Zip: inflate zerr=" << zerr 218 << " (next_in=" << zstream->Get().next_in 219 << " avail_in=" << zstream->Get().avail_in 220 << " next_out=" << zstream->Get().next_out 221 << " avail_out=" << zstream->Get().avail_out 222 << ")"; 223 return false; 224 } 225 226 // write when we're full or when we're done 227 if (zstream->Get().avail_out == 0 || 228 (zerr == Z_STREAM_END && zstream->Get().avail_out != kBufSize)) { 229 size_t bytes_to_write = zstream->Get().next_out - write_buf.get(); 230 memcpy(dst, write_buf.get(), bytes_to_write); 231 dst += bytes_to_write; 232 zstream->Get().next_out = write_buf.get(); 233 zstream->Get().avail_out = kBufSize; 234 } 235 } while (zerr == Z_OK); 236 237 DCHECK_EQ(zerr, Z_STREAM_END); // other errors should've been caught 238 239 // paranoia 240 if (zstream->Get().total_out != uncompressed_length) { 241 LOG(WARNING) << "Zip: size mismatch on inflated file (" 242 << zstream->Get().total_out << " vs " << uncompressed_length << ")"; 243 return false; 244 } 245 246 DCHECK_EQ(dst, begin + size); 247 return true; 248 } 249 250 bool ZipEntry::ExtractToFile(File& file) { 251 uint32_t length = GetUncompressedLength(); 252 int result = TEMP_FAILURE_RETRY(ftruncate(file.Fd(), length)); 253 if (result == -1) { 254 PLOG(WARNING) << "Zip: failed to ftruncate " << file.GetPath() << " to length " << length; 255 return false; 256 } 257 258 UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ | PROT_WRITE, MAP_SHARED, file.Fd(), 0)); 259 if (map.get() == NULL) { 260 LOG(WARNING) << "Zip: failed to mmap space for " << file.GetPath(); 261 return false; 262 } 263 264 return ExtractToMemory(map->Begin(), map->Size()); 265 } 266 267 bool ZipEntry::ExtractToMemory(uint8_t* begin, size_t size) { 268 // If size is zero, data offset will be meaningless, so bail out early. 269 if (size == 0) { 270 return true; 271 } 272 off64_t data_offset = GetDataOffset(); 273 if (data_offset == -1) { 274 LOG(WARNING) << "Zip: data_offset=" << data_offset; 275 return false; 276 } 277 if (lseek64(zip_archive_->fd_, data_offset, SEEK_SET) != data_offset) { 278 PLOG(WARNING) << "Zip: lseek to data at " << data_offset << " failed"; 279 return false; 280 } 281 282 // TODO: this doesn't verify the data's CRC, but probably should (especially 283 // for uncompressed data). 284 switch (GetCompressionMethod()) { 285 case kCompressStored: 286 return CopyFdToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength()); 287 case kCompressDeflated: 288 return InflateToMemory(begin, size, zip_archive_->fd_, 289 GetUncompressedLength(), GetCompressedLength()); 290 default: 291 LOG(WARNING) << "Zip: unknown compression method " << std::hex << GetCompressionMethod(); 292 return false; 293 } 294 } 295 296 MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename) { 297 std::string name(entry_filename); 298 name += " extracted in memory from "; 299 name += entry_filename; 300 UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(), 301 NULL, 302 GetUncompressedLength(), 303 PROT_READ | PROT_WRITE)); 304 if (map.get() == NULL) { 305 LOG(ERROR) << "Zip: mmap for '" << entry_filename << "' failed"; 306 return NULL; 307 } 308 309 bool success = ExtractToMemory(map->Begin(), map->Size()); 310 if (!success) { 311 LOG(ERROR) << "Zip: Failed to extract '" << entry_filename << "' to memory"; 312 return NULL; 313 } 314 315 return map.release(); 316 } 317 318 static void SetCloseOnExec(int fd) { 319 // This dance is more portable than Linux's O_CLOEXEC open(2) flag. 320 int flags = fcntl(fd, F_GETFD); 321 if (flags == -1) { 322 PLOG(WARNING) << "fcntl(" << fd << ", F_GETFD) failed"; 323 return; 324 } 325 int rc = fcntl(fd, F_SETFD, flags | FD_CLOEXEC); 326 if (rc == -1) { 327 PLOG(WARNING) << "fcntl(" << fd << ", F_SETFD, " << flags << ") failed"; 328 return; 329 } 330 } 331 332 ZipArchive* ZipArchive::Open(const std::string& filename) { 333 DCHECK(!filename.empty()); 334 int fd = open(filename.c_str(), O_RDONLY, 0); 335 if (fd == -1) { 336 PLOG(WARNING) << "Unable to open '" << filename << "'"; 337 return NULL; 338 } 339 return OpenFromFd(fd); 340 } 341 342 ZipArchive* ZipArchive::OpenFromFd(int fd) { 343 SetCloseOnExec(fd); 344 UniquePtr<ZipArchive> zip_archive(new ZipArchive(fd)); 345 if (zip_archive.get() == NULL) { 346 return NULL; 347 } 348 if (!zip_archive->MapCentralDirectory()) { 349 zip_archive->Close(); 350 return NULL; 351 } 352 if (!zip_archive->Parse()) { 353 zip_archive->Close(); 354 return NULL; 355 } 356 return zip_archive.release(); 357 } 358 359 ZipEntry* ZipArchive::Find(const char* name) const { 360 DCHECK(name != NULL); 361 DirEntries::const_iterator it = dir_entries_.find(name); 362 if (it == dir_entries_.end()) { 363 return NULL; 364 } 365 return new ZipEntry(this, (*it).second); 366 } 367 368 void ZipArchive::Close() { 369 if (fd_ != -1) { 370 close(fd_); 371 } 372 fd_ = -1; 373 num_entries_ = 0; 374 dir_offset_ = 0; 375 } 376 377 // Find the zip Central Directory and memory-map it. 378 // 379 // On success, returns true after populating fields from the EOCD area: 380 // num_entries_ 381 // dir_offset_ 382 // dir_map_ 383 bool ZipArchive::MapCentralDirectory() { 384 /* 385 * Get and test file length. 386 */ 387 off64_t file_length = lseek64(fd_, 0, SEEK_END); 388 if (file_length < kEOCDLen) { 389 LOG(WARNING) << "Zip: length " << file_length << " is too small to be zip"; 390 return false; 391 } 392 393 size_t read_amount = kMaxEOCDSearch; 394 if (file_length < off64_t(read_amount)) { 395 read_amount = file_length; 396 } 397 398 UniquePtr<uint8_t[]> scan_buf(new uint8_t[read_amount]); 399 if (scan_buf.get() == NULL) { 400 return false; 401 } 402 403 /* 404 * Make sure this is a Zip archive. 405 */ 406 if (lseek64(fd_, 0, SEEK_SET) != 0) { 407 PLOG(WARNING) << "seek to start failed: "; 408 return false; 409 } 410 411 ssize_t actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), sizeof(int32_t))); 412 if (actual != static_cast<ssize_t>(sizeof(int32_t))) { 413 PLOG(INFO) << "couldn't read first signature from zip archive: "; 414 return false; 415 } 416 417 unsigned int header = Le32ToHost(scan_buf.get()); 418 if (header != kLFHSignature) { 419 LOG(VERBOSE) << "Not a Zip archive (found " << std::hex << header << ")"; 420 return false; 421 } 422 423 // Perform the traditional EOCD snipe hunt. 424 // 425 // We're searching for the End of Central Directory magic number, 426 // which appears at the start of the EOCD block. It's followed by 427 // 18 bytes of EOCD stuff and up to 64KB of archive comment. We 428 // need to read the last part of the file into a buffer, dig through 429 // it to find the magic number, parse some values out, and use those 430 // to determine the extent of the CD. 431 // 432 // We start by pulling in the last part of the file. 433 off64_t search_start = file_length - read_amount; 434 435 if (lseek64(fd_, search_start, SEEK_SET) != search_start) { 436 PLOG(WARNING) << "Zip: seek " << search_start << " failed"; 437 return false; 438 } 439 actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), read_amount)); 440 if (actual != static_cast<ssize_t>(read_amount)) { 441 PLOG(WARNING) << "Zip: read " << actual << ", expected " << read_amount << ". failed"; 442 return false; 443 } 444 445 446 // Scan backward for the EOCD magic. In an archive without a trailing 447 // comment, we'll find it on the first try. (We may want to consider 448 // doing an initial minimal read; if we don't find it, retry with a 449 // second read as above.) 450 int i; 451 for (i = read_amount - kEOCDLen; i >= 0; i--) { 452 if (scan_buf.get()[i] == 0x50 && Le32ToHost(&(scan_buf.get())[i]) == kEOCDSignature) { 453 break; 454 } 455 } 456 if (i < 0) { 457 LOG(WARNING) << "Zip: EOCD not found, not a zip file"; 458 return false; 459 } 460 461 off64_t eocd_offset = search_start + i; 462 const byte* eocd_ptr = scan_buf.get() + i; 463 464 DCHECK(eocd_offset < file_length); 465 466 // Grab the CD offset and size, and the number of entries in the 467 // archive. Verify that they look reasonable. 468 uint16_t disk_number = Le16ToHost(eocd_ptr + kEOCDDiskNumber); 469 uint16_t disk_with_central_dir = Le16ToHost(eocd_ptr + kEOCDDiskNumberForCD); 470 uint16_t num_entries = Le16ToHost(eocd_ptr + kEOCDNumEntries); 471 uint16_t total_num_entries = Le16ToHost(eocd_ptr + kEOCDTotalNumEntries); 472 uint32_t dir_size = Le32ToHost(eocd_ptr + kEOCDSize); 473 uint32_t dir_offset = Le32ToHost(eocd_ptr + kEOCDFileOffset); 474 uint16_t comment_size = Le16ToHost(eocd_ptr + kEOCDCommentSize); 475 476 if ((uint64_t) dir_offset + (uint64_t) dir_size > (uint64_t) eocd_offset) { 477 LOG(WARNING) << "Zip: bad offsets (" 478 << "dir=" << dir_offset << ", " 479 << "size=" << dir_size << ", " 480 << "eocd=" << eocd_offset << ")"; 481 return false; 482 } 483 if (num_entries == 0) { 484 LOG(WARNING) << "Zip: empty archive?"; 485 return false; 486 } else if (num_entries != total_num_entries || disk_number != 0 || disk_with_central_dir != 0) { 487 LOG(WARNING) << "spanned archives not supported"; 488 return false; 489 } 490 491 // Check to see if comment is a sane size 492 if ((comment_size > (file_length - kEOCDLen)) 493 || (eocd_offset > (file_length - kEOCDLen) - comment_size)) { 494 LOG(WARNING) << "comment size runs off end of file"; 495 return false; 496 } 497 498 // It all looks good. Create a mapping for the CD. 499 dir_map_.reset(MemMap::MapFile(dir_size, PROT_READ, MAP_SHARED, fd_, dir_offset)); 500 if (dir_map_.get() == NULL) { 501 return false; 502 } 503 504 num_entries_ = num_entries; 505 dir_offset_ = dir_offset; 506 return true; 507 } 508 509 bool ZipArchive::Parse() { 510 const byte* cd_ptr = dir_map_->Begin(); 511 size_t cd_length = dir_map_->Size(); 512 513 // Walk through the central directory, adding entries to the hash 514 // table and verifying values. 515 const byte* ptr = cd_ptr; 516 for (int i = 0; i < num_entries_; i++) { 517 if (Le32ToHost(ptr) != kCDESignature) { 518 LOG(WARNING) << "Zip: missed a central dir sig (at " << i << ")"; 519 return false; 520 } 521 if (ptr + kCDELen > cd_ptr + cd_length) { 522 LOG(WARNING) << "Zip: ran off the end (at " << i << ")"; 523 return false; 524 } 525 526 int64_t local_hdr_offset = Le32ToHost(ptr + kCDELocalOffset); 527 if (local_hdr_offset >= dir_offset_) { 528 LOG(WARNING) << "Zip: bad LFH offset " << local_hdr_offset << " at entry " << i; 529 return false; 530 } 531 532 uint16_t gpbf = Le16ToHost(ptr + kCDEGPBFlags); 533 if ((gpbf & kGPFUnsupportedMask) != 0) { 534 LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf; 535 return false; 536 } 537 538 uint16_t name_len = Le16ToHost(ptr + kCDENameLen); 539 uint16_t extra_len = Le16ToHost(ptr + kCDEExtraLen); 540 uint16_t comment_len = Le16ToHost(ptr + kCDECommentLen); 541 542 // add the CDE filename to the hash table 543 const char* name = reinterpret_cast<const char*>(ptr + kCDELen); 544 545 // Check name for NULL characters 546 if (memchr(name, 0, name_len) != NULL) { 547 LOG(WARNING) << "Filename contains NUL byte"; 548 return false; 549 } 550 551 dir_entries_.Put(StringPiece(name, name_len), ptr); 552 ptr += kCDELen + name_len + extra_len + comment_len; 553 if (ptr > cd_ptr + cd_length) { 554 LOG(WARNING) << "Zip: bad CD advance " 555 << "(" << ptr << " vs " << (cd_ptr + cd_length) << ") " 556 << "at entry " << i; 557 return false; 558 } 559 } 560 return true; 561 } 562 563 } // namespace art 564