Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "zip_archive.h"
     18 
     19 #include <vector>
     20 
     21 #include <fcntl.h>
     22 #include <sys/stat.h>
     23 #include <sys/types.h>
     24 #include <unistd.h>
     25 
     26 #include "base/unix_file/fd_file.h"
     27 #include "UniquePtr.h"
     28 
     29 namespace art {
     30 
     31 static const size_t kBufSize = 32 * KB;
     32 
     33 // Get 2 little-endian bytes.
     34 static uint32_t Le16ToHost(const byte* src) {
     35   return ((src[0] <<  0) |
     36           (src[1] <<  8));
     37 }
     38 
     39 // Get 4 little-endian bytes.
     40 static uint32_t Le32ToHost(const byte* src) {
     41   return ((src[0] <<  0) |
     42           (src[1] <<  8) |
     43           (src[2] << 16) |
     44           (src[3] << 24));
     45 }
     46 
     47 uint16_t ZipEntry::GetCompressionMethod() {
     48   return Le16ToHost(ptr_ + ZipArchive::kCDEMethod);
     49 }
     50 
     51 uint32_t ZipEntry::GetCompressedLength() {
     52   return Le32ToHost(ptr_ + ZipArchive::kCDECompLen);
     53 }
     54 
     55 uint32_t ZipEntry::GetUncompressedLength() {
     56   return Le32ToHost(ptr_ + ZipArchive::kCDEUncompLen);
     57 }
     58 
     59 uint32_t ZipEntry::GetCrc32() {
     60   return Le32ToHost(ptr_ + ZipArchive::kCDECRC);
     61 }
     62 
     63 off64_t ZipEntry::GetDataOffset() {
     64   // All we have is the offset to the Local File Header, which is
     65   // variable size, so we have to read the contents of the struct to
     66   // figure out where the actual data starts.
     67 
     68   // We also need to make sure that the lengths are not so large that
     69   // somebody trying to map the compressed or uncompressed data runs
     70   // off the end of the mapped region.
     71 
     72   off64_t dir_offset = zip_archive_->dir_offset_;
     73   int64_t lfh_offset = Le32ToHost(ptr_ + ZipArchive::kCDELocalOffset);
     74   if (lfh_offset + ZipArchive::kLFHLen >= dir_offset) {
     75     LOG(WARNING) << "Zip: bad LFH offset in zip";
     76     return -1;
     77   }
     78 
     79   if (lseek64(zip_archive_->fd_, lfh_offset, SEEK_SET) != lfh_offset) {
     80     PLOG(WARNING) << "Zip: failed seeking to LFH at offset " << lfh_offset;
     81     return -1;
     82   }
     83 
     84   uint8_t lfh_buf[ZipArchive::kLFHLen];
     85   ssize_t actual = TEMP_FAILURE_RETRY(read(zip_archive_->fd_, lfh_buf, sizeof(lfh_buf)));
     86   if (actual != sizeof(lfh_buf)) {
     87     LOG(WARNING) << "Zip: failed reading LFH from offset " << lfh_offset;
     88     return -1;
     89   }
     90 
     91   if (Le32ToHost(lfh_buf) != ZipArchive::kLFHSignature) {
     92     LOG(WARNING) << "Zip: didn't find signature at start of LFH, offset " << lfh_offset;
     93     return -1;
     94   }
     95 
     96   uint32_t gpbf = Le16ToHost(lfh_buf + ZipArchive::kLFHGPBFlags);
     97   if ((gpbf & ZipArchive::kGPFUnsupportedMask) != 0) {
     98     LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf;
     99     return -1;
    100   }
    101 
    102   off64_t data_offset = (lfh_offset + ZipArchive::kLFHLen
    103                        + Le16ToHost(lfh_buf + ZipArchive::kLFHNameLen)
    104                        + Le16ToHost(lfh_buf + ZipArchive::kLFHExtraLen));
    105   if (data_offset >= dir_offset) {
    106     LOG(WARNING) << "Zip: bad data offset " << data_offset << " in zip";
    107     return -1;
    108   }
    109 
    110   // check lengths
    111 
    112   if (static_cast<off64_t>(data_offset + GetCompressedLength()) > dir_offset) {
    113     LOG(WARNING) << "Zip: bad compressed length in zip "
    114                  << "(" << data_offset << " + " << GetCompressedLength()
    115                  << " > " << dir_offset << ")";
    116     return -1;
    117   }
    118 
    119   if (GetCompressionMethod() == kCompressStored
    120       && static_cast<off64_t>(data_offset + GetUncompressedLength()) > dir_offset) {
    121     LOG(WARNING) << "Zip: bad uncompressed length in zip "
    122                  << "(" << data_offset << " + " << GetUncompressedLength()
    123                  << " > " << dir_offset << ")";
    124     return -1;
    125   }
    126 
    127   return data_offset;
    128 }
    129 
    130 static bool CopyFdToMemory(uint8_t* begin, size_t size, int in, size_t count) {
    131   uint8_t* dst = begin;
    132   std::vector<uint8_t> buf(kBufSize);
    133   while (count != 0) {
    134     size_t bytes_to_read = (count > kBufSize) ? kBufSize : count;
    135     ssize_t actual = TEMP_FAILURE_RETRY(read(in, &buf[0], bytes_to_read));
    136     if (actual != static_cast<ssize_t>(bytes_to_read)) {
    137       PLOG(WARNING) << "Zip: short read";
    138       return false;
    139     }
    140     memcpy(dst, &buf[0], bytes_to_read);
    141     dst += bytes_to_read;
    142     count -= bytes_to_read;
    143   }
    144   DCHECK_EQ(dst, begin + size);
    145   return true;
    146 }
    147 
    148 class ZStream {
    149  public:
    150   ZStream(byte* write_buf, size_t write_buf_size) {
    151     // Initialize the zlib stream struct.
    152     memset(&zstream_, 0, sizeof(zstream_));
    153     zstream_.zalloc = Z_NULL;
    154     zstream_.zfree = Z_NULL;
    155     zstream_.opaque = Z_NULL;
    156     zstream_.next_in = NULL;
    157     zstream_.avail_in = 0;
    158     zstream_.next_out = reinterpret_cast<Bytef*>(write_buf);
    159     zstream_.avail_out = write_buf_size;
    160     zstream_.data_type = Z_UNKNOWN;
    161   }
    162 
    163   z_stream& Get() {
    164     return zstream_;
    165   }
    166 
    167   ~ZStream() {
    168     inflateEnd(&zstream_);
    169   }
    170  private:
    171   z_stream zstream_;
    172 };
    173 
    174 static bool InflateToMemory(uint8_t* begin, size_t size,
    175                             int in, size_t uncompressed_length, size_t compressed_length) {
    176   uint8_t* dst = begin;
    177   UniquePtr<uint8_t[]> read_buf(new uint8_t[kBufSize]);
    178   UniquePtr<uint8_t[]> write_buf(new uint8_t[kBufSize]);
    179   if (read_buf.get() == NULL || write_buf.get() == NULL) {
    180     LOG(WARNING) << "Zip: failed to allocate buffer to inflate";
    181     return false;
    182   }
    183 
    184   UniquePtr<ZStream> zstream(new ZStream(write_buf.get(), kBufSize));
    185 
    186   // Use the undocumented "negative window bits" feature to tell zlib
    187   // that there's no zlib header waiting for it.
    188   int zerr = inflateInit2(&zstream->Get(), -MAX_WBITS);
    189   if (zerr != Z_OK) {
    190     if (zerr == Z_VERSION_ERROR) {
    191       LOG(ERROR) << "Installed zlib is not compatible with linked version (" << ZLIB_VERSION << ")";
    192     } else {
    193       LOG(WARNING) << "Call to inflateInit2 failed (zerr=" << zerr << ")";
    194     }
    195     return false;
    196   }
    197 
    198   size_t remaining = compressed_length;
    199   do {
    200     // read as much as we can
    201     if (zstream->Get().avail_in == 0) {
    202       size_t bytes_to_read = (remaining > kBufSize) ? kBufSize : remaining;
    203 
    204         ssize_t actual = TEMP_FAILURE_RETRY(read(in, read_buf.get(), bytes_to_read));
    205         if (actual != static_cast<ssize_t>(bytes_to_read)) {
    206           LOG(WARNING) << "Zip: inflate read failed (" << actual << " vs " << bytes_to_read << ")";
    207           return false;
    208         }
    209         remaining -= bytes_to_read;
    210         zstream->Get().next_in = read_buf.get();
    211         zstream->Get().avail_in = bytes_to_read;
    212     }
    213 
    214     // uncompress the data
    215     zerr = inflate(&zstream->Get(), Z_NO_FLUSH);
    216     if (zerr != Z_OK && zerr != Z_STREAM_END) {
    217       LOG(WARNING) << "Zip: inflate zerr=" << zerr
    218                    << " (next_in=" << zstream->Get().next_in
    219                    << " avail_in=" << zstream->Get().avail_in
    220                    << " next_out=" << zstream->Get().next_out
    221                    << " avail_out=" << zstream->Get().avail_out
    222                    << ")";
    223       return false;
    224     }
    225 
    226     // write when we're full or when we're done
    227     if (zstream->Get().avail_out == 0 ||
    228         (zerr == Z_STREAM_END && zstream->Get().avail_out != kBufSize)) {
    229       size_t bytes_to_write = zstream->Get().next_out - write_buf.get();
    230       memcpy(dst, write_buf.get(), bytes_to_write);
    231       dst += bytes_to_write;
    232       zstream->Get().next_out = write_buf.get();
    233       zstream->Get().avail_out = kBufSize;
    234     }
    235   } while (zerr == Z_OK);
    236 
    237   DCHECK_EQ(zerr, Z_STREAM_END);  // other errors should've been caught
    238 
    239   // paranoia
    240   if (zstream->Get().total_out != uncompressed_length) {
    241     LOG(WARNING) << "Zip: size mismatch on inflated file ("
    242                  << zstream->Get().total_out << " vs " << uncompressed_length << ")";
    243     return false;
    244   }
    245 
    246   DCHECK_EQ(dst, begin + size);
    247   return true;
    248 }
    249 
    250 bool ZipEntry::ExtractToFile(File& file) {
    251   uint32_t length = GetUncompressedLength();
    252   int result = TEMP_FAILURE_RETRY(ftruncate(file.Fd(), length));
    253   if (result == -1) {
    254     PLOG(WARNING) << "Zip: failed to ftruncate " << file.GetPath() << " to length " << length;
    255     return false;
    256   }
    257 
    258   UniquePtr<MemMap> map(MemMap::MapFile(length, PROT_READ | PROT_WRITE, MAP_SHARED, file.Fd(), 0));
    259   if (map.get() == NULL) {
    260     LOG(WARNING) << "Zip: failed to mmap space for " << file.GetPath();
    261     return false;
    262   }
    263 
    264   return ExtractToMemory(map->Begin(), map->Size());
    265 }
    266 
    267 bool ZipEntry::ExtractToMemory(uint8_t* begin, size_t size) {
    268   // If size is zero, data offset will be meaningless, so bail out early.
    269   if (size == 0) {
    270     return true;
    271   }
    272   off64_t data_offset = GetDataOffset();
    273   if (data_offset == -1) {
    274     LOG(WARNING) << "Zip: data_offset=" << data_offset;
    275     return false;
    276   }
    277   if (lseek64(zip_archive_->fd_, data_offset, SEEK_SET) != data_offset) {
    278     PLOG(WARNING) << "Zip: lseek to data at " << data_offset << " failed";
    279     return false;
    280   }
    281 
    282   // TODO: this doesn't verify the data's CRC, but probably should (especially
    283   // for uncompressed data).
    284   switch (GetCompressionMethod()) {
    285     case kCompressStored:
    286       return CopyFdToMemory(begin, size, zip_archive_->fd_, GetUncompressedLength());
    287     case kCompressDeflated:
    288       return InflateToMemory(begin, size, zip_archive_->fd_,
    289                              GetUncompressedLength(), GetCompressedLength());
    290     default:
    291       LOG(WARNING) << "Zip: unknown compression method " << std::hex << GetCompressionMethod();
    292       return false;
    293   }
    294 }
    295 
    296 MemMap* ZipEntry::ExtractToMemMap(const char* entry_filename) {
    297   std::string name(entry_filename);
    298   name += " extracted in memory from ";
    299   name += entry_filename;
    300   UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
    301                                              NULL,
    302                                              GetUncompressedLength(),
    303                                              PROT_READ | PROT_WRITE));
    304   if (map.get() == NULL) {
    305     LOG(ERROR) << "Zip: mmap for '" << entry_filename << "' failed";
    306     return NULL;
    307   }
    308 
    309   bool success = ExtractToMemory(map->Begin(), map->Size());
    310   if (!success) {
    311     LOG(ERROR) << "Zip: Failed to extract '" << entry_filename << "' to memory";
    312     return NULL;
    313   }
    314 
    315   return map.release();
    316 }
    317 
    318 static void SetCloseOnExec(int fd) {
    319   // This dance is more portable than Linux's O_CLOEXEC open(2) flag.
    320   int flags = fcntl(fd, F_GETFD);
    321   if (flags == -1) {
    322     PLOG(WARNING) << "fcntl(" << fd << ", F_GETFD) failed";
    323     return;
    324   }
    325   int rc = fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
    326   if (rc == -1) {
    327     PLOG(WARNING) << "fcntl(" << fd << ", F_SETFD, " << flags << ") failed";
    328     return;
    329   }
    330 }
    331 
    332 ZipArchive* ZipArchive::Open(const std::string& filename) {
    333   DCHECK(!filename.empty());
    334   int fd = open(filename.c_str(), O_RDONLY, 0);
    335   if (fd == -1) {
    336     PLOG(WARNING) << "Unable to open '" << filename << "'";
    337     return NULL;
    338   }
    339   return OpenFromFd(fd);
    340 }
    341 
    342 ZipArchive* ZipArchive::OpenFromFd(int fd) {
    343   SetCloseOnExec(fd);
    344   UniquePtr<ZipArchive> zip_archive(new ZipArchive(fd));
    345   if (zip_archive.get() == NULL) {
    346       return NULL;
    347   }
    348   if (!zip_archive->MapCentralDirectory()) {
    349       zip_archive->Close();
    350       return NULL;
    351   }
    352   if (!zip_archive->Parse()) {
    353       zip_archive->Close();
    354       return NULL;
    355   }
    356   return zip_archive.release();
    357 }
    358 
    359 ZipEntry* ZipArchive::Find(const char* name) const {
    360   DCHECK(name != NULL);
    361   DirEntries::const_iterator it = dir_entries_.find(name);
    362   if (it == dir_entries_.end()) {
    363     return NULL;
    364   }
    365   return new ZipEntry(this, (*it).second);
    366 }
    367 
    368 void ZipArchive::Close() {
    369   if (fd_ != -1) {
    370     close(fd_);
    371   }
    372   fd_ = -1;
    373   num_entries_ = 0;
    374   dir_offset_ = 0;
    375 }
    376 
    377 // Find the zip Central Directory and memory-map it.
    378 //
    379 // On success, returns true after populating fields from the EOCD area:
    380 //   num_entries_
    381 //   dir_offset_
    382 //   dir_map_
    383 bool ZipArchive::MapCentralDirectory() {
    384   /*
    385    * Get and test file length.
    386    */
    387   off64_t file_length = lseek64(fd_, 0, SEEK_END);
    388   if (file_length < kEOCDLen) {
    389     LOG(WARNING) << "Zip: length " << file_length << " is too small to be zip";
    390     return false;
    391   }
    392 
    393   size_t read_amount = kMaxEOCDSearch;
    394   if (file_length < off64_t(read_amount)) {
    395     read_amount = file_length;
    396   }
    397 
    398   UniquePtr<uint8_t[]> scan_buf(new uint8_t[read_amount]);
    399   if (scan_buf.get() == NULL) {
    400     return false;
    401   }
    402 
    403   /*
    404    * Make sure this is a Zip archive.
    405    */
    406   if (lseek64(fd_, 0, SEEK_SET) != 0) {
    407     PLOG(WARNING) << "seek to start failed: ";
    408     return false;
    409   }
    410 
    411   ssize_t actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), sizeof(int32_t)));
    412   if (actual != static_cast<ssize_t>(sizeof(int32_t))) {
    413     PLOG(INFO) << "couldn't read first signature from zip archive: ";
    414     return false;
    415   }
    416 
    417   unsigned int header = Le32ToHost(scan_buf.get());
    418   if (header != kLFHSignature) {
    419     LOG(VERBOSE) << "Not a Zip archive (found " << std::hex << header << ")";
    420     return false;
    421   }
    422 
    423   // Perform the traditional EOCD snipe hunt.
    424   //
    425   // We're searching for the End of Central Directory magic number,
    426   // which appears at the start of the EOCD block.  It's followed by
    427   // 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
    428   // need to read the last part of the file into a buffer, dig through
    429   // it to find the magic number, parse some values out, and use those
    430   // to determine the extent of the CD.
    431   //
    432   // We start by pulling in the last part of the file.
    433   off64_t search_start = file_length - read_amount;
    434 
    435   if (lseek64(fd_, search_start, SEEK_SET) != search_start) {
    436     PLOG(WARNING) << "Zip: seek " << search_start << " failed";
    437     return false;
    438   }
    439   actual = TEMP_FAILURE_RETRY(read(fd_, scan_buf.get(), read_amount));
    440   if (actual != static_cast<ssize_t>(read_amount)) {
    441     PLOG(WARNING) << "Zip: read " << actual << ", expected " << read_amount << ". failed";
    442     return false;
    443   }
    444 
    445 
    446   // Scan backward for the EOCD magic.  In an archive without a trailing
    447   // comment, we'll find it on the first try.  (We may want to consider
    448   // doing an initial minimal read; if we don't find it, retry with a
    449   // second read as above.)
    450   int i;
    451   for (i = read_amount - kEOCDLen; i >= 0; i--) {
    452     if (scan_buf.get()[i] == 0x50 && Le32ToHost(&(scan_buf.get())[i]) == kEOCDSignature) {
    453       break;
    454     }
    455   }
    456   if (i < 0) {
    457     LOG(WARNING) << "Zip: EOCD not found, not a zip file";
    458     return false;
    459   }
    460 
    461   off64_t eocd_offset = search_start + i;
    462   const byte* eocd_ptr = scan_buf.get() + i;
    463 
    464   DCHECK(eocd_offset < file_length);
    465 
    466   // Grab the CD offset and size, and the number of entries in the
    467   // archive.  Verify that they look reasonable.
    468   uint16_t disk_number = Le16ToHost(eocd_ptr + kEOCDDiskNumber);
    469   uint16_t disk_with_central_dir = Le16ToHost(eocd_ptr + kEOCDDiskNumberForCD);
    470   uint16_t num_entries = Le16ToHost(eocd_ptr + kEOCDNumEntries);
    471   uint16_t total_num_entries = Le16ToHost(eocd_ptr + kEOCDTotalNumEntries);
    472   uint32_t dir_size = Le32ToHost(eocd_ptr + kEOCDSize);
    473   uint32_t dir_offset = Le32ToHost(eocd_ptr + kEOCDFileOffset);
    474   uint16_t comment_size = Le16ToHost(eocd_ptr + kEOCDCommentSize);
    475 
    476   if ((uint64_t) dir_offset + (uint64_t) dir_size > (uint64_t) eocd_offset) {
    477     LOG(WARNING) << "Zip: bad offsets ("
    478                  << "dir=" << dir_offset << ", "
    479                  << "size=" << dir_size  << ", "
    480                  << "eocd=" << eocd_offset << ")";
    481     return false;
    482   }
    483   if (num_entries == 0) {
    484     LOG(WARNING) << "Zip: empty archive?";
    485     return false;
    486   } else if (num_entries != total_num_entries || disk_number != 0 || disk_with_central_dir != 0) {
    487     LOG(WARNING) << "spanned archives not supported";
    488     return false;
    489   }
    490 
    491   // Check to see if comment is a sane size
    492   if ((comment_size > (file_length - kEOCDLen))
    493       || (eocd_offset > (file_length - kEOCDLen) - comment_size)) {
    494     LOG(WARNING) << "comment size runs off end of file";
    495     return false;
    496   }
    497 
    498   // It all looks good.  Create a mapping for the CD.
    499   dir_map_.reset(MemMap::MapFile(dir_size, PROT_READ, MAP_SHARED, fd_, dir_offset));
    500   if (dir_map_.get() == NULL) {
    501     return false;
    502   }
    503 
    504   num_entries_ = num_entries;
    505   dir_offset_ = dir_offset;
    506   return true;
    507 }
    508 
    509 bool ZipArchive::Parse() {
    510   const byte* cd_ptr = dir_map_->Begin();
    511   size_t cd_length = dir_map_->Size();
    512 
    513   // Walk through the central directory, adding entries to the hash
    514   // table and verifying values.
    515   const byte* ptr = cd_ptr;
    516   for (int i = 0; i < num_entries_; i++) {
    517     if (Le32ToHost(ptr) != kCDESignature) {
    518       LOG(WARNING) << "Zip: missed a central dir sig (at " << i << ")";
    519       return false;
    520     }
    521     if (ptr + kCDELen > cd_ptr + cd_length) {
    522       LOG(WARNING) << "Zip: ran off the end (at " << i << ")";
    523       return false;
    524     }
    525 
    526     int64_t local_hdr_offset = Le32ToHost(ptr + kCDELocalOffset);
    527     if (local_hdr_offset >= dir_offset_) {
    528       LOG(WARNING) << "Zip: bad LFH offset " << local_hdr_offset << " at entry " << i;
    529       return false;
    530     }
    531 
    532     uint16_t gpbf = Le16ToHost(ptr + kCDEGPBFlags);
    533     if ((gpbf & kGPFUnsupportedMask) != 0) {
    534       LOG(WARNING) << "Invalid General Purpose Bit Flag: " << gpbf;
    535       return false;
    536     }
    537 
    538     uint16_t name_len = Le16ToHost(ptr + kCDENameLen);
    539     uint16_t extra_len = Le16ToHost(ptr + kCDEExtraLen);
    540     uint16_t comment_len = Le16ToHost(ptr + kCDECommentLen);
    541 
    542     // add the CDE filename to the hash table
    543     const char* name = reinterpret_cast<const char*>(ptr + kCDELen);
    544 
    545     // Check name for NULL characters
    546     if (memchr(name, 0, name_len) != NULL) {
    547       LOG(WARNING) << "Filename contains NUL byte";
    548       return false;
    549     }
    550 
    551     dir_entries_.Put(StringPiece(name, name_len), ptr);
    552     ptr += kCDELen + name_len + extra_len + comment_len;
    553     if (ptr > cd_ptr + cd_length) {
    554       LOG(WARNING) << "Zip: bad CD advance "
    555                    << "(" << ptr << " vs " << (cd_ptr + cd_length) << ") "
    556                    << "at entry " << i;
    557       return false;
    558     }
    559   }
    560   return true;
    561 }
    562 
    563 }  // namespace art
    564