Home | History | Annotate | Download | only in libziparchive
      1 /*
      2  * Copyright (C) 2008 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *    http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Read-only access to Zip archives, with minimal heap allocation.
     19  */
     20 
     21 #include <assert.h>
     22 #include <errno.h>
     23 #include <fcntl.h>
     24 #include <inttypes.h>
     25 #include <limits.h>
     26 #include <log/log.h>
     27 #include <stdlib.h>
     28 #include <string.h>
     29 #include <unistd.h>
     30 #include <utils/Compat.h>
     31 #include <utils/FileMap.h>
     32 #include <zlib.h>
     33 
     34 #include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
     35 
     36 #include "ziparchive/zip_archive.h"
     37 
     38 // This is for windows. If we don't open a file in binary mode, weird
     39 // things will happen.
     40 #ifndef O_BINARY
     41 #define O_BINARY 0
     42 #endif
     43 
     44 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
     45     TypeName(); \
     46     TypeName(const TypeName&); \
     47     void operator=(const TypeName&)
     48 
     49 // The "end of central directory" (EOCD) record. Each archive
     50 // contains exactly once such record which appears at the end of
     51 // the archive. It contains archive wide information like the
     52 // number of entries in the archive and the offset to the central
     53 // directory of the offset.
     54 struct EocdRecord {
     55   static const uint32_t kSignature = 0x06054b50;
     56 
     57   // End of central directory signature, should always be
     58   // |kSignature|.
     59   uint32_t eocd_signature;
     60   // The number of the current "disk", i.e, the "disk" that this
     61   // central directory is on.
     62   //
     63   // This implementation assumes that each archive spans a single
     64   // disk only. i.e, that disk_num == 1.
     65   uint16_t disk_num;
     66   // The disk where the central directory starts.
     67   //
     68   // This implementation assumes that each archive spans a single
     69   // disk only. i.e, that cd_start_disk == 1.
     70   uint16_t cd_start_disk;
     71   // The number of central directory records on this disk.
     72   //
     73   // This implementation assumes that each archive spans a single
     74   // disk only. i.e, that num_records_on_disk == num_records.
     75   uint16_t num_records_on_disk;
     76   // The total number of central directory records.
     77   uint16_t num_records;
     78   // The size of the central directory (in bytes).
     79   uint32_t cd_size;
     80   // The offset of the start of the central directory, relative
     81   // to the start of the file.
     82   uint32_t cd_start_offset;
     83   // Length of the central directory comment.
     84   uint16_t comment_length;
     85  private:
     86   DISALLOW_IMPLICIT_CONSTRUCTORS(EocdRecord);
     87 } __attribute__((packed));
     88 
     89 // A structure representing the fixed length fields for a single
     90 // record in the central directory of the archive. In addition to
     91 // the fixed length fields listed here, each central directory
     92 // record contains a variable length "file_name" and "extra_field"
     93 // whose lengths are given by |file_name_length| and |extra_field_length|
     94 // respectively.
     95 struct CentralDirectoryRecord {
     96   static const uint32_t kSignature = 0x02014b50;
     97 
     98   // The start of record signature. Must be |kSignature|.
     99   uint32_t record_signature;
    100   // Tool version. Ignored by this implementation.
    101   uint16_t version_made_by;
    102   // Tool version. Ignored by this implementation.
    103   uint16_t version_needed;
    104   // The "general purpose bit flags" for this entry. The only
    105   // flag value that we currently check for is the "data descriptor"
    106   // flag.
    107   uint16_t gpb_flags;
    108   // The compression method for this entry, one of |kCompressStored|
    109   // and |kCompressDeflated|.
    110   uint16_t compression_method;
    111   // The file modification time and date for this entry.
    112   uint16_t last_mod_time;
    113   uint16_t last_mod_date;
    114   // The CRC-32 checksum for this entry.
    115   uint32_t crc32;
    116   // The compressed size (in bytes) of this entry.
    117   uint32_t compressed_size;
    118   // The uncompressed size (in bytes) of this entry.
    119   uint32_t uncompressed_size;
    120   // The length of the entry file name in bytes. The file name
    121   // will appear immediately after this record.
    122   uint16_t file_name_length;
    123   // The length of the extra field info (in bytes). This data
    124   // will appear immediately after the entry file name.
    125   uint16_t extra_field_length;
    126   // The length of the entry comment (in bytes). This data will
    127   // appear immediately after the extra field.
    128   uint16_t comment_length;
    129   // The start disk for this entry. Ignored by this implementation).
    130   uint16_t file_start_disk;
    131   // File attributes. Ignored by this implementation.
    132   uint16_t internal_file_attributes;
    133   // File attributes. Ignored by this implementation.
    134   uint32_t external_file_attributes;
    135   // The offset to the local file header for this entry, from the
    136   // beginning of this archive.
    137   uint32_t local_file_header_offset;
    138  private:
    139   DISALLOW_IMPLICIT_CONSTRUCTORS(CentralDirectoryRecord);
    140 } __attribute__((packed));
    141 
    142 // The local file header for a given entry. This duplicates information
    143 // present in the central directory of the archive. It is an error for
    144 // the information here to be different from the central directory
    145 // information for a given entry.
    146 struct LocalFileHeader {
    147   static const uint32_t kSignature = 0x04034b50;
    148 
    149   // The local file header signature, must be |kSignature|.
    150   uint32_t lfh_signature;
    151   // Tool version. Ignored by this implementation.
    152   uint16_t version_needed;
    153   // The "general purpose bit flags" for this entry. The only
    154   // flag value that we currently check for is the "data descriptor"
    155   // flag.
    156   uint16_t gpb_flags;
    157   // The compression method for this entry, one of |kCompressStored|
    158   // and |kCompressDeflated|.
    159   uint16_t compression_method;
    160   // The file modification time and date for this entry.
    161   uint16_t last_mod_time;
    162   uint16_t last_mod_date;
    163   // The CRC-32 checksum for this entry.
    164   uint32_t crc32;
    165   // The compressed size (in bytes) of this entry.
    166   uint32_t compressed_size;
    167   // The uncompressed size (in bytes) of this entry.
    168   uint32_t uncompressed_size;
    169   // The length of the entry file name in bytes. The file name
    170   // will appear immediately after this record.
    171   uint16_t file_name_length;
    172   // The length of the extra field info (in bytes). This data
    173   // will appear immediately after the entry file name.
    174   uint16_t extra_field_length;
    175  private:
    176   DISALLOW_IMPLICIT_CONSTRUCTORS(LocalFileHeader);
    177 } __attribute__((packed));
    178 
    179 struct DataDescriptor {
    180   // The *optional* data descriptor start signature.
    181   static const uint32_t kOptSignature = 0x08074b50;
    182 
    183   // CRC-32 checksum of the entry.
    184   uint32_t crc32;
    185   // Compressed size of the entry.
    186   uint32_t compressed_size;
    187   // Uncompressed size of the entry.
    188   uint32_t uncompressed_size;
    189  private:
    190   DISALLOW_IMPLICIT_CONSTRUCTORS(DataDescriptor);
    191 } __attribute__((packed));
    192 
    193 #undef DISALLOW_IMPLICIT_CONSTRUCTORS
    194 
    195 static const uint32_t kGPBDDFlagMask = 0x0008;         // mask value that signifies that the entry has a DD
    196 static const uint32_t kMaxErrorLen = 1024;
    197 
    198 // The maximum size of a central directory or a file
    199 // comment in bytes.
    200 static const uint32_t kMaxCommentLen = 65535;
    201 
    202 // The maximum number of bytes to scan backwards for the EOCD start.
    203 static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
    204 
    205 static const char* kErrorMessages[] = {
    206   "Unknown return code.",
    207   "Iteration ended",
    208   "Zlib error",
    209   "Invalid file",
    210   "Invalid handle",
    211   "Duplicate entries in archive",
    212   "Empty archive",
    213   "Entry not found",
    214   "Invalid offset",
    215   "Inconsistent information",
    216   "Invalid entry name",
    217   "I/O Error",
    218   "File mapping failed"
    219 };
    220 
    221 static const int32_t kErrorMessageUpperBound = 0;
    222 
    223 static const int32_t kIterationEnd = -1;
    224 
    225 // We encountered a Zlib error when inflating a stream from this file.
    226 // Usually indicates file corruption.
    227 static const int32_t kZlibError = -2;
    228 
    229 // The input file cannot be processed as a zip archive. Usually because
    230 // it's too small, too large or does not have a valid signature.
    231 static const int32_t kInvalidFile = -3;
    232 
    233 // An invalid iteration / ziparchive handle was passed in as an input
    234 // argument.
    235 static const int32_t kInvalidHandle = -4;
    236 
    237 // The zip archive contained two (or possibly more) entries with the same
    238 // name.
    239 static const int32_t kDuplicateEntry = -5;
    240 
    241 // The zip archive contains no entries.
    242 static const int32_t kEmptyArchive = -6;
    243 
    244 // The specified entry was not found in the archive.
    245 static const int32_t kEntryNotFound = -7;
    246 
    247 // The zip archive contained an invalid local file header pointer.
    248 static const int32_t kInvalidOffset = -8;
    249 
    250 // The zip archive contained inconsistent entry information. This could
    251 // be because the central directory & local file header did not agree, or
    252 // if the actual uncompressed length or crc32 do not match their declared
    253 // values.
    254 static const int32_t kInconsistentInformation = -9;
    255 
    256 // An invalid entry name was encountered.
    257 static const int32_t kInvalidEntryName = -10;
    258 
    259 // An I/O related system call (read, lseek, ftruncate, map) failed.
    260 static const int32_t kIoError = -11;
    261 
    262 // We were not able to mmap the central directory or entry contents.
    263 static const int32_t kMmapFailed = -12;
    264 
    265 static const int32_t kErrorMessageLowerBound = -13;
    266 
    267 static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
    268 
    269 /*
    270  * A Read-only Zip archive.
    271  *
    272  * We want "open" and "find entry by name" to be fast operations, and
    273  * we want to use as little memory as possible.  We memory-map the zip
    274  * central directory, and load a hash table with pointers to the filenames
    275  * (which aren't null-terminated).  The other fields are at a fixed offset
    276  * from the filename, so we don't need to extract those (but we do need
    277  * to byte-read and endian-swap them every time we want them).
    278  *
    279  * It's possible that somebody has handed us a massive (~1GB) zip archive,
    280  * so we can't expect to mmap the entire file.
    281  *
    282  * To speed comparisons when doing a lookup by name, we could make the mapping
    283  * "private" (copy-on-write) and null-terminate the filenames after verifying
    284  * the record structure.  However, this requires a private mapping of
    285  * every page that the Central Directory touches.  Easier to tuck a copy
    286  * of the string length into the hash table entry.
    287  */
    288 struct ZipArchive {
    289   /* open Zip archive */
    290   const int fd;
    291 
    292   /* mapped central directory area */
    293   off64_t directory_offset;
    294   android::FileMap* directory_map;
    295 
    296   /* number of entries in the Zip archive */
    297   uint16_t num_entries;
    298 
    299   /*
    300    * We know how many entries are in the Zip archive, so we can have a
    301    * fixed-size hash table. We define a load factor of 0.75 and overallocat
    302    * so the maximum number entries can never be higher than
    303    * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
    304    */
    305   uint32_t hash_table_size;
    306   ZipEntryName* hash_table;
    307 
    308   ZipArchive(const int fd) :
    309       fd(fd),
    310       directory_offset(0),
    311       directory_map(NULL),
    312       num_entries(0),
    313       hash_table_size(0),
    314       hash_table(NULL) {}
    315 
    316   ~ZipArchive() {
    317     if (fd >= 0) {
    318       close(fd);
    319     }
    320 
    321     if (directory_map != NULL) {
    322       directory_map->release();
    323     }
    324     free(hash_table);
    325   }
    326 };
    327 
    328 // Returns 0 on success and negative values on failure.
    329 static android::FileMap* MapFileSegment(const int fd, const off64_t start,
    330                                         const size_t length, const bool read_only,
    331                                         const char* debug_file_name) {
    332   android::FileMap* file_map = new android::FileMap;
    333   const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
    334   if (!success) {
    335     file_map->release();
    336     return NULL;
    337   }
    338 
    339   return file_map;
    340 }
    341 
    342 static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
    343   static const uint32_t kBufSize = 32768;
    344   uint8_t buf[kBufSize];
    345 
    346   uint32_t count = 0;
    347   uint64_t crc = 0;
    348   while (count < length) {
    349     uint32_t remaining = length - count;
    350 
    351     // Safe conversion because kBufSize is narrow enough for a 32 bit signed
    352     // value.
    353     ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
    354     ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
    355 
    356     if (actual != get_size) {
    357       ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
    358       return kIoError;
    359     }
    360 
    361     memcpy(begin + count, buf, get_size);
    362     crc = crc32(crc, buf, get_size);
    363     count += get_size;
    364   }
    365 
    366   *crc_out = crc;
    367 
    368   return 0;
    369 }
    370 
    371 /*
    372  * Round up to the next highest power of 2.
    373  *
    374  * Found on http://graphics.stanford.edu/~seander/bithacks.html.
    375  */
    376 static uint32_t RoundUpPower2(uint32_t val) {
    377   val--;
    378   val |= val >> 1;
    379   val |= val >> 2;
    380   val |= val >> 4;
    381   val |= val >> 8;
    382   val |= val >> 16;
    383   val++;
    384 
    385   return val;
    386 }
    387 
    388 static uint32_t ComputeHash(const char* str, uint16_t len) {
    389   uint32_t hash = 0;
    390 
    391   while (len--) {
    392     hash = hash * 31 + *str++;
    393   }
    394 
    395   return hash;
    396 }
    397 
    398 /*
    399  * Convert a ZipEntry to a hash table index, verifying that it's in a
    400  * valid range.
    401  */
    402 static int64_t EntryToIndex(const ZipEntryName* hash_table,
    403                             const uint32_t hash_table_size,
    404                             const char* name, uint16_t length) {
    405   const uint32_t hash = ComputeHash(name, length);
    406 
    407   // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
    408   uint32_t ent = hash & (hash_table_size - 1);
    409   while (hash_table[ent].name != NULL) {
    410     if (hash_table[ent].name_length == length &&
    411         memcmp(hash_table[ent].name, name, length) == 0) {
    412       return ent;
    413     }
    414 
    415     ent = (ent + 1) & (hash_table_size - 1);
    416   }
    417 
    418   ALOGV("Zip: Unable to find entry %.*s", length, name);
    419   return kEntryNotFound;
    420 }
    421 
    422 /*
    423  * Add a new entry to the hash table.
    424  */
    425 static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
    426                          const char* name, uint16_t length) {
    427   const uint64_t hash = ComputeHash(name, length);
    428   uint32_t ent = hash & (hash_table_size - 1);
    429 
    430   /*
    431    * We over-allocated the table, so we're guaranteed to find an empty slot.
    432    * Further, we guarantee that the hashtable size is not 0.
    433    */
    434   while (hash_table[ent].name != NULL) {
    435     if (hash_table[ent].name_length == length &&
    436         memcmp(hash_table[ent].name, name, length) == 0) {
    437       // We've found a duplicate entry. We don't accept it
    438       ALOGW("Zip: Found duplicate entry %.*s", length, name);
    439       return kDuplicateEntry;
    440     }
    441     ent = (ent + 1) & (hash_table_size - 1);
    442   }
    443 
    444   hash_table[ent].name = name;
    445   hash_table[ent].name_length = length;
    446   return 0;
    447 }
    448 
    449 static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
    450                                     ZipArchive* archive, off64_t file_length,
    451                                     off64_t read_amount, uint8_t* scan_buffer) {
    452   const off64_t search_start = file_length - read_amount;
    453 
    454   if (lseek64(fd, search_start, SEEK_SET) != search_start) {
    455     ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
    456           strerror(errno));
    457     return kIoError;
    458   }
    459   ssize_t actual = TEMP_FAILURE_RETRY(
    460       read(fd, scan_buffer, static_cast<size_t>(read_amount)));
    461   if (actual != static_cast<ssize_t>(read_amount)) {
    462     ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
    463           strerror(errno));
    464     return kIoError;
    465   }
    466 
    467   /*
    468    * Scan backward for the EOCD magic.  In an archive without a trailing
    469    * comment, we'll find it on the first try.  (We may want to consider
    470    * doing an initial minimal read; if we don't find it, retry with a
    471    * second read as above.)
    472    */
    473   int i = read_amount - sizeof(EocdRecord);
    474   for (; i >= 0; i--) {
    475     if (scan_buffer[i] == 0x50 &&
    476         ((*reinterpret_cast<uint32_t*>(&scan_buffer[i])) == EocdRecord::kSignature)) {
    477       ALOGV("+++ Found EOCD at buf+%d", i);
    478       break;
    479     }
    480   }
    481   if (i < 0) {
    482     ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
    483     return kInvalidFile;
    484   }
    485 
    486   const off64_t eocd_offset = search_start + i;
    487   const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
    488   /*
    489    * Verify that there's no trailing space at the end of the central directory
    490    * and its comment.
    491    */
    492   const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
    493       + eocd->comment_length;
    494   if (calculated_length != file_length) {
    495     ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
    496           static_cast<int64_t>(file_length - calculated_length));
    497     return kInvalidFile;
    498   }
    499 
    500   /*
    501    * Grab the CD offset and size, and the number of entries in the
    502    * archive and verify that they look reasonable.
    503    */
    504   if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
    505     ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
    506         eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
    507     return kInvalidOffset;
    508   }
    509   if (eocd->num_records == 0) {
    510     ALOGW("Zip: empty archive?");
    511     return kEmptyArchive;
    512   }
    513 
    514   ALOGV("+++ num_entries=%" PRIu32 "dir_size=%" PRIu32 " dir_offset=%" PRIu32,
    515         eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
    516 
    517   /*
    518    * It all looks good.  Create a mapping for the CD, and set the fields
    519    * in archive.
    520    */
    521   android::FileMap* map = MapFileSegment(fd,
    522       static_cast<off64_t>(eocd->cd_start_offset),
    523       static_cast<size_t>(eocd->cd_size),
    524       true /* read only */, debug_file_name);
    525   if (map == NULL) {
    526     archive->directory_map = NULL;
    527     return kMmapFailed;
    528   }
    529 
    530   archive->directory_map = map;
    531   archive->num_entries = eocd->num_records;
    532   archive->directory_offset = eocd->cd_start_offset;
    533 
    534   return 0;
    535 }
    536 
    537 /*
    538  * Find the zip Central Directory and memory-map it.
    539  *
    540  * On success, returns 0 after populating fields from the EOCD area:
    541  *   directory_offset
    542  *   directory_map
    543  *   num_entries
    544  */
    545 static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
    546                                    ZipArchive* archive) {
    547 
    548   // Test file length. We use lseek64 to make sure the file
    549   // is small enough to be a zip file (Its size must be less than
    550   // 0xffffffff bytes).
    551   off64_t file_length = lseek64(fd, 0, SEEK_END);
    552   if (file_length == -1) {
    553     ALOGV("Zip: lseek on fd %d failed", fd);
    554     return kInvalidFile;
    555   }
    556 
    557   if (file_length > (off64_t) 0xffffffff) {
    558     ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
    559     return kInvalidFile;
    560   }
    561 
    562   if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
    563     ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
    564     return kInvalidFile;
    565   }
    566 
    567   /*
    568    * Perform the traditional EOCD snipe hunt.
    569    *
    570    * We're searching for the End of Central Directory magic number,
    571    * which appears at the start of the EOCD block.  It's followed by
    572    * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
    573    * need to read the last part of the file into a buffer, dig through
    574    * it to find the magic number, parse some values out, and use those
    575    * to determine the extent of the CD.
    576    *
    577    * We start by pulling in the last part of the file.
    578    */
    579   off64_t read_amount = kMaxEOCDSearch;
    580   if (file_length < read_amount) {
    581     read_amount = file_length;
    582   }
    583 
    584   uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
    585   int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
    586                                         file_length, read_amount, scan_buffer);
    587 
    588   free(scan_buffer);
    589   return result;
    590 }
    591 
    592 /*
    593  * Parses the Zip archive's Central Directory.  Allocates and populates the
    594  * hash table.
    595  *
    596  * Returns 0 on success.
    597  */
    598 static int32_t ParseZipArchive(ZipArchive* archive) {
    599   int32_t result = -1;
    600   const uint8_t* const cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
    601   const size_t cd_length = archive->directory_map->getDataLength();
    602   const uint16_t num_entries = archive->num_entries;
    603 
    604   /*
    605    * Create hash table.  We have a minimum 75% load factor, possibly as
    606    * low as 50% after we round off to a power of 2.  There must be at
    607    * least one unused entry to avoid an infinite loop during creation.
    608    */
    609   archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
    610   archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
    611       sizeof(ZipEntryName));
    612 
    613   /*
    614    * Walk through the central directory, adding entries to the hash
    615    * table and verifying values.
    616    */
    617   const uint8_t* const cd_end = cd_ptr + cd_length;
    618   const uint8_t* ptr = cd_ptr;
    619   for (uint16_t i = 0; i < num_entries; i++) {
    620     const CentralDirectoryRecord* cdr =
    621         reinterpret_cast<const CentralDirectoryRecord*>(ptr);
    622     if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
    623       ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
    624       goto bail;
    625     }
    626 
    627     if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
    628       ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
    629       goto bail;
    630     }
    631 
    632     const off64_t local_header_offset = cdr->local_file_header_offset;
    633     if (local_header_offset >= archive->directory_offset) {
    634       ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, (int64_t)local_header_offset, i);
    635       goto bail;
    636     }
    637 
    638     const uint16_t file_name_length = cdr->file_name_length;
    639     const uint16_t extra_length = cdr->extra_field_length;
    640     const uint16_t comment_length = cdr->comment_length;
    641     const char* file_name = reinterpret_cast<const char*>(ptr + sizeof(CentralDirectoryRecord));
    642 
    643     /* check that file name doesn't contain \0 character */
    644     if (memchr(file_name, 0, file_name_length) != NULL) {
    645       ALOGW("Zip: entry name can't contain \\0 character");
    646       goto bail;
    647     }
    648 
    649     /* add the CDE filename to the hash table */
    650     const int add_result = AddToHash(archive->hash_table,
    651         archive->hash_table_size, file_name, file_name_length);
    652     if (add_result) {
    653       ALOGW("Zip: Error adding entry to hash table %d", add_result);
    654       result = add_result;
    655       goto bail;
    656     }
    657 
    658     ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
    659     if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
    660       ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
    661           ptr - cd_ptr, cd_length, i);
    662       goto bail;
    663     }
    664   }
    665   ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
    666 
    667   result = 0;
    668 
    669 bail:
    670   return result;
    671 }
    672 
    673 static int32_t OpenArchiveInternal(ZipArchive* archive,
    674                                    const char* debug_file_name) {
    675   int32_t result = -1;
    676   if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
    677     return result;
    678   }
    679 
    680   if ((result = ParseZipArchive(archive))) {
    681     return result;
    682   }
    683 
    684   return 0;
    685 }
    686 
    687 int32_t OpenArchiveFd(int fd, const char* debug_file_name,
    688                       ZipArchiveHandle* handle) {
    689   ZipArchive* archive = new ZipArchive(fd);
    690   *handle = archive;
    691   return OpenArchiveInternal(archive, debug_file_name);
    692 }
    693 
    694 int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
    695   const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
    696   ZipArchive* archive = new ZipArchive(fd);
    697   *handle = archive;
    698 
    699   if (fd < 0) {
    700     ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
    701     return kIoError;
    702   }
    703   return OpenArchiveInternal(archive, fileName);
    704 }
    705 
    706 /*
    707  * Close a ZipArchive, closing the file and freeing the contents.
    708  */
    709 void CloseArchive(ZipArchiveHandle handle) {
    710   ZipArchive* archive = (ZipArchive*) handle;
    711   ALOGV("Closing archive %p", archive);
    712   delete archive;
    713 }
    714 
    715 static int32_t UpdateEntryFromDataDescriptor(int fd,
    716                                              ZipEntry *entry) {
    717   uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
    718   ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
    719   if (actual != sizeof(ddBuf)) {
    720     return kIoError;
    721   }
    722 
    723   const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
    724   const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
    725   const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
    726 
    727   entry->crc32 = descriptor->crc32;
    728   entry->compressed_length = descriptor->compressed_size;
    729   entry->uncompressed_length = descriptor->uncompressed_size;
    730 
    731   return 0;
    732 }
    733 
    734 // Attempts to read |len| bytes into |buf| at offset |off|.
    735 //
    736 // This method uses pread64 on platforms that support it and
    737 // lseek64 + read on platforms that don't. This implies that
    738 // callers should not rely on the |fd| offset being incremented
    739 // as a side effect of this call.
    740 static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
    741                                    off64_t off) {
    742 #ifdef HAVE_PREAD
    743   return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
    744 #else
    745   // The only supported platform that doesn't support pread at the moment
    746   // is Windows. Only recent versions of windows support unix like forks,
    747   // and even there the semantics are quite different.
    748   if (lseek64(fd, off, SEEK_SET) != off) {
    749     ALOGW("Zip: failed seek to offset %" PRId64, off);
    750     return kIoError;
    751   }
    752 
    753   return TEMP_FAILURE_RETRY(read(fd, buf, len));
    754 #endif  // HAVE_PREAD
    755 }
    756 
    757 static int32_t FindEntry(const ZipArchive* archive, const int ent,
    758                          ZipEntry* data) {
    759   const uint16_t nameLen = archive->hash_table[ent].name_length;
    760   const char* name = archive->hash_table[ent].name;
    761 
    762   // Recover the start of the central directory entry from the filename
    763   // pointer.  The filename is the first entry past the fixed-size data,
    764   // so we can just subtract back from that.
    765   const uint8_t* ptr = reinterpret_cast<const uint8_t*>(name);
    766   ptr -= sizeof(CentralDirectoryRecord);
    767 
    768   // This is the base of our mmapped region, we have to sanity check that
    769   // the name that's in the hash table is a pointer to a location within
    770   // this mapped region.
    771   const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
    772     archive->directory_map->getDataPtr());
    773   if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
    774     ALOGW("Zip: Invalid entry pointer");
    775     return kInvalidOffset;
    776   }
    777 
    778   const CentralDirectoryRecord *cdr =
    779       reinterpret_cast<const CentralDirectoryRecord*>(ptr);
    780 
    781   // The offset of the start of the central directory in the zipfile.
    782   // We keep this lying around so that we can sanity check all our lengths
    783   // and our per-file structures.
    784   const off64_t cd_offset = archive->directory_offset;
    785 
    786   // Fill out the compression method, modification time, crc32
    787   // and other interesting attributes from the central directory. These
    788   // will later be compared against values from the local file header.
    789   data->method = cdr->compression_method;
    790   data->mod_time = cdr->last_mod_time;
    791   data->crc32 = cdr->crc32;
    792   data->compressed_length = cdr->compressed_size;
    793   data->uncompressed_length = cdr->uncompressed_size;
    794 
    795   // Figure out the local header offset from the central directory. The
    796   // actual file data will begin after the local header and the name /
    797   // extra comments.
    798   const off64_t local_header_offset = cdr->local_file_header_offset;
    799   if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
    800     ALOGW("Zip: bad local hdr offset in zip");
    801     return kInvalidOffset;
    802   }
    803 
    804   uint8_t lfh_buf[sizeof(LocalFileHeader)];
    805   ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
    806                                  local_header_offset);
    807   if (actual != sizeof(lfh_buf)) {
    808     ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)local_header_offset);
    809     return kIoError;
    810   }
    811 
    812   const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
    813 
    814   if (lfh->lfh_signature != LocalFileHeader::kSignature) {
    815     ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
    816         static_cast<int64_t>(local_header_offset));
    817     return kInvalidOffset;
    818   }
    819 
    820   // Paranoia: Match the values specified in the local file header
    821   // to those specified in the central directory.
    822   if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
    823     data->has_data_descriptor = 0;
    824     if (data->compressed_length != lfh->compressed_size
    825         || data->uncompressed_length != lfh->uncompressed_size
    826         || data->crc32 != lfh->crc32) {
    827       ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
    828         ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
    829         data->compressed_length, data->uncompressed_length, data->crc32,
    830         lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
    831       return kInconsistentInformation;
    832     }
    833   } else {
    834     data->has_data_descriptor = 1;
    835   }
    836 
    837   // Check that the local file header name matches the declared
    838   // name in the central directory.
    839   if (lfh->file_name_length == nameLen) {
    840     const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
    841     if (name_offset + lfh->file_name_length >= cd_offset) {
    842       ALOGW("Zip: Invalid declared length");
    843       return kInvalidOffset;
    844     }
    845 
    846     uint8_t* name_buf = (uint8_t*) malloc(nameLen);
    847     ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
    848                                   name_offset);
    849 
    850     if (actual != nameLen) {
    851       ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)name_offset);
    852       free(name_buf);
    853       return kIoError;
    854     }
    855 
    856     if (memcmp(name, name_buf, nameLen)) {
    857       free(name_buf);
    858       return kInconsistentInformation;
    859     }
    860 
    861     free(name_buf);
    862   } else {
    863     ALOGW("Zip: lfh name did not match central directory.");
    864     return kInconsistentInformation;
    865   }
    866 
    867   const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
    868       + lfh->file_name_length + lfh->extra_field_length;
    869   if (data_offset > cd_offset) {
    870     ALOGW("Zip: bad data offset %" PRId64 " in zip", (int64_t)data_offset);
    871     return kInvalidOffset;
    872   }
    873 
    874   if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
    875     ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
    876       (int64_t)data_offset, data->compressed_length, (int64_t)cd_offset);
    877     return kInvalidOffset;
    878   }
    879 
    880   if (data->method == kCompressStored &&
    881     (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
    882      ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
    883        (int64_t)data_offset, data->uncompressed_length, (int64_t)cd_offset);
    884      return kInvalidOffset;
    885   }
    886 
    887   data->offset = data_offset;
    888   return 0;
    889 }
    890 
    891 struct IterationHandle {
    892   uint32_t position;
    893   const char* prefix;
    894   uint16_t prefix_len;
    895   ZipArchive* archive;
    896 };
    897 
    898 int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
    899   ZipArchive* archive = (ZipArchive *) handle;
    900 
    901   if (archive == NULL || archive->hash_table == NULL) {
    902     ALOGW("Zip: Invalid ZipArchiveHandle");
    903     return kInvalidHandle;
    904   }
    905 
    906   IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
    907   cookie->position = 0;
    908   cookie->prefix = prefix;
    909   cookie->archive = archive;
    910   if (prefix != NULL) {
    911     cookie->prefix_len = strlen(prefix);
    912   }
    913 
    914   *cookie_ptr = cookie ;
    915   return 0;
    916 }
    917 
    918 int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
    919                   ZipEntry* data) {
    920   const ZipArchive* archive = (ZipArchive*) handle;
    921   const int nameLen = strlen(entryName);
    922   if (nameLen == 0 || nameLen > 65535) {
    923     ALOGW("Zip: Invalid filename %s", entryName);
    924     return kInvalidEntryName;
    925   }
    926 
    927   const int64_t ent = EntryToIndex(archive->hash_table,
    928     archive->hash_table_size, entryName, nameLen);
    929 
    930   if (ent < 0) {
    931     ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
    932     return ent;
    933   }
    934 
    935   return FindEntry(archive, ent, data);
    936 }
    937 
    938 int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
    939   IterationHandle* handle = (IterationHandle *) cookie;
    940   if (handle == NULL) {
    941     return kInvalidHandle;
    942   }
    943 
    944   ZipArchive* archive = handle->archive;
    945   if (archive == NULL || archive->hash_table == NULL) {
    946     ALOGW("Zip: Invalid ZipArchiveHandle");
    947     return kInvalidHandle;
    948   }
    949 
    950   const uint32_t currentOffset = handle->position;
    951   const uint32_t hash_table_length = archive->hash_table_size;
    952   const ZipEntryName *hash_table = archive->hash_table;
    953 
    954   for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
    955     if (hash_table[i].name != NULL &&
    956         (handle->prefix == NULL ||
    957          (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
    958       handle->position = (i + 1);
    959       const int error = FindEntry(archive, i, data);
    960       if (!error) {
    961         name->name = hash_table[i].name;
    962         name->name_length = hash_table[i].name_length;
    963       }
    964 
    965       return error;
    966     }
    967   }
    968 
    969   handle->position = 0;
    970   return kIterationEnd;
    971 }
    972 
    973 static int32_t InflateToFile(int fd, const ZipEntry* entry,
    974                              uint8_t* begin, uint32_t length,
    975                              uint64_t* crc_out) {
    976   int32_t result = -1;
    977   const uint32_t kBufSize = 32768;
    978   uint8_t read_buf[kBufSize];
    979   uint8_t write_buf[kBufSize];
    980   z_stream zstream;
    981   int zerr;
    982 
    983   /*
    984    * Initialize the zlib stream struct.
    985    */
    986   memset(&zstream, 0, sizeof(zstream));
    987   zstream.zalloc = Z_NULL;
    988   zstream.zfree = Z_NULL;
    989   zstream.opaque = Z_NULL;
    990   zstream.next_in = NULL;
    991   zstream.avail_in = 0;
    992   zstream.next_out = (Bytef*) write_buf;
    993   zstream.avail_out = kBufSize;
    994   zstream.data_type = Z_UNKNOWN;
    995 
    996   /*
    997    * Use the undocumented "negative window bits" feature to tell zlib
    998    * that there's no zlib header waiting for it.
    999    */
   1000   zerr = inflateInit2(&zstream, -MAX_WBITS);
   1001   if (zerr != Z_OK) {
   1002     if (zerr == Z_VERSION_ERROR) {
   1003       ALOGE("Installed zlib is not compatible with linked version (%s)",
   1004         ZLIB_VERSION);
   1005     } else {
   1006       ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
   1007     }
   1008 
   1009     return kZlibError;
   1010   }
   1011 
   1012   const uint32_t uncompressed_length = entry->uncompressed_length;
   1013 
   1014   uint32_t compressed_length = entry->compressed_length;
   1015   uint32_t write_count = 0;
   1016   do {
   1017     /* read as much as we can */
   1018     if (zstream.avail_in == 0) {
   1019       const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
   1020       const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
   1021       if (actual != getSize) {
   1022         ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
   1023         result = kIoError;
   1024         goto z_bail;
   1025       }
   1026 
   1027       compressed_length -= getSize;
   1028 
   1029       zstream.next_in = read_buf;
   1030       zstream.avail_in = getSize;
   1031     }
   1032 
   1033     /* uncompress the data */
   1034     zerr = inflate(&zstream, Z_NO_FLUSH);
   1035     if (zerr != Z_OK && zerr != Z_STREAM_END) {
   1036       ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
   1037           zerr, zstream.next_in, zstream.avail_in,
   1038           zstream.next_out, zstream.avail_out);
   1039       result = kZlibError;
   1040       goto z_bail;
   1041     }
   1042 
   1043     /* write when we're full or when we're done */
   1044     if (zstream.avail_out == 0 ||
   1045       (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
   1046       const size_t write_size = zstream.next_out - write_buf;
   1047       // The file might have declared a bogus length.
   1048       if (write_size + write_count > length) {
   1049         goto z_bail;
   1050       }
   1051       memcpy(begin + write_count, write_buf, write_size);
   1052       write_count += write_size;
   1053 
   1054       zstream.next_out = write_buf;
   1055       zstream.avail_out = kBufSize;
   1056     }
   1057   } while (zerr == Z_OK);
   1058 
   1059   assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
   1060 
   1061   // stream.adler holds the crc32 value for such streams.
   1062   *crc_out = zstream.adler;
   1063 
   1064   if (zstream.total_out != uncompressed_length || compressed_length != 0) {
   1065     ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
   1066         zstream.total_out, uncompressed_length);
   1067     result = kInconsistentInformation;
   1068     goto z_bail;
   1069   }
   1070 
   1071   result = 0;
   1072 
   1073 z_bail:
   1074   inflateEnd(&zstream);    /* free up any allocated structures */
   1075 
   1076   return result;
   1077 }
   1078 
   1079 int32_t ExtractToMemory(ZipArchiveHandle handle,
   1080                         ZipEntry* entry, uint8_t* begin, uint32_t size) {
   1081   ZipArchive* archive = (ZipArchive*) handle;
   1082   const uint16_t method = entry->method;
   1083   off64_t data_offset = entry->offset;
   1084 
   1085   if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
   1086     ALOGW("Zip: lseek to data at %" PRId64 " failed", (int64_t)data_offset);
   1087     return kIoError;
   1088   }
   1089 
   1090   // this should default to kUnknownCompressionMethod.
   1091   int32_t return_value = -1;
   1092   uint64_t crc = 0;
   1093   if (method == kCompressStored) {
   1094     return_value = CopyFileToFile(archive->fd, begin, size, &crc);
   1095   } else if (method == kCompressDeflated) {
   1096     return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
   1097   }
   1098 
   1099   if (!return_value && entry->has_data_descriptor) {
   1100     return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
   1101     if (return_value) {
   1102       return return_value;
   1103     }
   1104   }
   1105 
   1106   // TODO: Fix this check by passing the right flags to inflate2 so that
   1107   // it calculates the CRC for us.
   1108   if (entry->crc32 != crc && false) {
   1109     ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
   1110     return kInconsistentInformation;
   1111   }
   1112 
   1113   return return_value;
   1114 }
   1115 
   1116 int32_t ExtractEntryToFile(ZipArchiveHandle handle,
   1117                            ZipEntry* entry, int fd) {
   1118   const int32_t declared_length = entry->uncompressed_length;
   1119 
   1120   const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
   1121   if (current_offset == -1) {
   1122     ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
   1123           strerror(errno));
   1124     return kIoError;
   1125   }
   1126 
   1127   int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
   1128   if (result == -1) {
   1129     ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
   1130           (int64_t)(declared_length + current_offset), strerror(errno));
   1131     return kIoError;
   1132   }
   1133 
   1134   // Don't attempt to map a region of length 0. We still need the
   1135   // ftruncate() though, since the API guarantees that we will truncate
   1136   // the file to the end of the uncompressed output.
   1137   if (declared_length == 0) {
   1138       return 0;
   1139   }
   1140 
   1141   android::FileMap* map  = MapFileSegment(fd, current_offset, declared_length,
   1142                                           false, kTempMappingFileName);
   1143   if (map == NULL) {
   1144     return kMmapFailed;
   1145   }
   1146 
   1147   const int32_t error = ExtractToMemory(handle, entry,
   1148                                         reinterpret_cast<uint8_t*>(map->getDataPtr()),
   1149                                         map->getDataLength());
   1150   map->release();
   1151   return error;
   1152 }
   1153 
   1154 const char* ErrorCodeString(int32_t error_code) {
   1155   if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
   1156     return kErrorMessages[error_code * -1];
   1157   }
   1158 
   1159   return kErrorMessages[0];
   1160 }
   1161 
   1162 int GetFileDescriptor(const ZipArchiveHandle handle) {
   1163   return ((ZipArchive*) handle)->fd;
   1164 }
   1165 
   1166