Home | History | Annotate | Download | only in ijar
      1 // Copyright 2007 Alan Donovan. All rights reserved.
      2 //
      3 // Author: Alan Donovan <adonovan (at) google.com>
      4 //
      5 // Licensed under the Apache License, Version 2.0 (the "License");
      6 // you may not use this file except in compliance with the License.
      7 // You may obtain a copy of the License at
      8 //
      9 //    http://www.apache.org/licenses/LICENSE-2.0
     10 //
     11 // Unless required by applicable law or agreed to in writing, software
     12 // distributed under the License is distributed on an "AS IS" BASIS,
     13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 // See the License for the specific language governing permissions and
     15 // limitations under the License.
     16 //
     17 // zip.cc -- .zip (.jar) file reading/writing routines.
     18 //
     19 
     20 // See README.txt for details.
     21 //
     22 // See http://www.pkware.com/documents/casestudies/APPNOTE.TXT
     23 // for definition of PKZIP file format.
     24 
     25 #define _FILE_OFFSET_BITS 64  // Support zip files larger than 2GB
     26 
     27 #include <errno.h>
     28 #include <fcntl.h>
     29 #include <stddef.h>
     30 #include <stdint.h>
     31 #include <stdarg.h>
     32 #include <stdio.h>
     33 #include <stdlib.h>
     34 #include <string.h>
     35 #include <sys/mman.h>
     36 #include <unistd.h>
     37 #include <limits.h>
     38 #include <limits>
     39 #include <vector>
     40 
     41 #include "zip.h"
     42 #include <zlib.h>
     43 
     44 #define LOCAL_FILE_HEADER_SIGNATURE           0x04034b50
     45 #define CENTRAL_FILE_HEADER_SIGNATURE         0x02014b50
     46 #define END_OF_CENTRAL_DIR_SIGNATURE          0x06054b50
     47 #define DATA_DESCRIPTOR_SIGNATURE             0x08074b50
     48 
     49 // version to extract: 1.0 - default value from APPNOTE.TXT.
     50 // Output JAR files contain no extra ZIP features, so this is enough.
     51 #define ZIP_VERSION_TO_EXTRACT                10
     52 #define COMPRESSION_METHOD_STORED             0   // no compression
     53 #define COMPRESSION_METHOD_DEFLATED           8
     54 
     55 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3)
     56 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11)
     57 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1))
     58 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \
     59   (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \
     60   | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \
     61   | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED)
     62 
     63 namespace devtools_ijar {
     64 // In the absence of ZIP64 support, zip files are limited to 4GB.
     65 // http://www.info-zip.org/FAQ.html#limits
     66 static const u8 kMaximumOutputSize = std::numeric_limits<uint32_t>::max();
     67 
     68 static bool ProcessCentralDirEntry(const u1 *&p,
     69                                    size_t *compressed_size,
     70                                    size_t *uncompressed_size,
     71                                    char *filename,
     72                                    size_t filename_size,
     73                                    u4 *attr,
     74                                    u4 *offset);
     75 
     76 //
     77 // A class representing a ZipFile for reading. Its public API is exposed
     78 // using the ZipExtractor abstract class.
     79 //
     80 class InputZipFile : public ZipExtractor {
     81  public:
     82   InputZipFile(ZipExtractorProcessor *processor, int fd, off_t in_length,
     83                off_t in_offset, const u1* zipdata_in, const u1* central_dir);
     84   virtual ~InputZipFile();
     85 
     86   virtual const char* GetError() {
     87     if (errmsg[0] == 0) {
     88       return NULL;
     89     }
     90     return errmsg;
     91   }
     92 
     93   virtual bool ProcessNext();
     94   virtual void Reset();
     95   virtual size_t GetSize() {
     96     return in_length_;
     97   }
     98 
     99   virtual u8 CalculateOutputLength();
    100 
    101  private:
    102   ZipExtractorProcessor *processor;
    103 
    104   int fd_in;  // Input file descripor
    105 
    106   // InputZipFile is responsible for maintaining the following
    107   // pointers. They are allocated by the Create() method before
    108   // the object is actually created using mmap.
    109   const u1 * const zipdata_in_;   // start of input file mmap
    110   const u1 * zipdata_in_mapped_;  // start of still mapped region
    111   const u1 * const central_dir_;  // central directory in input file
    112 
    113   size_t in_length_;  // size of the input file
    114   size_t in_offset_;  // offset  the input file
    115 
    116   const u1 *p;  // input cursor
    117 
    118   const u1* central_dir_current_;  // central dir input cursor
    119 
    120   // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every
    121   // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is
    122   // not enough, we bail out. We only decompress class files, so they should
    123   // be smaller than 64K anyway, but we give a little leeway.
    124   // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the
    125   // ZIP. It is set to 128M here so we can uncompress the Bazel server with
    126   // this library.
    127   static const size_t INITIAL_BUFFER_SIZE = 256 * 1024;  // 256K
    128   static const size_t MAX_BUFFER_SIZE = 128 * 1024 * 1024;
    129   static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024;
    130 
    131   // These metadata fields are the fields of the ZIP header of the file being
    132   // processed.
    133   u2 extract_version_;
    134   u2 general_purpose_bit_flag_;
    135   u2 compression_method_;
    136   u4 uncompressed_size_;
    137   u4 compressed_size_;
    138   u2 file_name_length_;
    139   u2 extra_field_length_;
    140   const u1 *file_name_;
    141   const u1 *extra_field_;
    142 
    143   // Administration of memory reserved for decompressed data. We use the same
    144   // buffer for each file to avoid some malloc()/free() calls and free the
    145   // memory only in the dtor. C-style memory management is used so that we
    146   // can call realloc.
    147   u1 *uncompressed_data_;
    148   size_t uncompressed_data_allocated_;
    149 
    150   // Copy of the last filename entry - Null-terminated.
    151   char filename[PATH_MAX];
    152   // The external file attribute field
    153   u4 attr;
    154 
    155   // last error
    156   char errmsg[4*PATH_MAX];
    157 
    158   int error(const char *fmt, ...) {
    159     va_list ap;
    160     va_start(ap, fmt);
    161     vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
    162     va_end(ap);
    163     return -1;
    164   }
    165 
    166   // Check that at least n bytes remain in the input file, otherwise
    167   // abort with an error message.  "state" is the name of the field
    168   // we're about to read, for diagnostics.
    169   int EnsureRemaining(size_t n, const char *state) {
    170     size_t in_offset = p - zipdata_in_;
    171     size_t remaining = in_length_ - in_offset;
    172     if (n > remaining) {
    173       return error("Premature end of file (at offset %zd, state=%s); "
    174                    "expected %zd more bytes but found %zd.\n",
    175                    in_offset, state, n, remaining);
    176     }
    177     return 0;
    178   }
    179 
    180   // Read one entry from input zip file
    181   int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size);
    182 
    183   // Uncompress a file from the archive using zlib. The pointer returned
    184   // is owned by InputZipFile, so it must not be freed. Advances the input
    185   // cursor to the first byte after the compressed data.
    186   u1* UncompressFile();
    187 
    188   // Skip a file
    189   int SkipFile(const bool compressed);
    190 
    191   // Process a file
    192   int ProcessFile(const bool compressed);
    193 };
    194 
    195 //
    196 // A class implementing ZipBuilder that represent an open zip file for writing.
    197 //
    198 class OutputZipFile : public ZipBuilder {
    199  public:
    200   OutputZipFile(int fd, u1 * const zipdata_out) :
    201       fd_out(fd),
    202       zipdata_out_(zipdata_out),
    203       q(zipdata_out) {
    204     errmsg[0] = 0;
    205   }
    206 
    207   virtual const char* GetError() {
    208     if (errmsg[0] == 0) {
    209       return NULL;
    210     }
    211     return errmsg;
    212   }
    213 
    214   virtual ~OutputZipFile() { Finish(); }
    215   virtual u1* NewFile(const char* filename, const u4 attr);
    216   virtual int FinishFile(size_t filelength, bool compress = false,
    217                          bool compute_crc = false);
    218   virtual int WriteEmptyFile(const char *filename);
    219   virtual size_t GetSize() {
    220     return Offset(q);
    221   }
    222   virtual int GetNumberFiles() {
    223     return entries_.size();
    224   }
    225   virtual int Finish();
    226 
    227  private:
    228   struct LocalFileEntry {
    229     // Start of the local header (in the output buffer).
    230     size_t local_header_offset;
    231 
    232     // Sizes of the file entry
    233     size_t uncompressed_length;
    234     size_t compressed_length;
    235 
    236     // Compression method
    237     u2 compression_method;
    238 
    239     // CRC32
    240     u4 crc32;
    241 
    242     // external attributes field
    243     u4 external_attr;
    244 
    245     // Start/length of the file_name in the local header.
    246     u1 *file_name;
    247     u2 file_name_length;
    248 
    249     // Start/length of the extra_field in the local header.
    250     const u1 *extra_field;
    251     u2 extra_field_length;
    252   };
    253 
    254   int fd_out;  // file descriptor for the output file
    255 
    256   // OutputZipFile is responsible for maintaining the following
    257   // pointers. They are allocated by the Create() method before
    258   // the object is actually created using mmap.
    259   u1 * const zipdata_out_;        // start of output file mmap
    260   u1 *q;  // output cursor
    261 
    262   u1 *header_ptr;  // Current pointer to "compression method" entry.
    263 
    264   // List of entries to write the central directory
    265   std::vector<LocalFileEntry*> entries_;
    266 
    267   // last error
    268   char errmsg[4*PATH_MAX];
    269 
    270   int error(const char *fmt, ...) {
    271     va_list ap;
    272     va_start(ap, fmt);
    273     vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
    274     va_end(ap);
    275     return -1;
    276   }
    277 
    278   // Write the ZIP central directory structure for each local file
    279   // entry in "entries".
    280   void WriteCentralDirectory();
    281 
    282   // Returns the offset of the pointer relative to the start of the
    283   // output zip file.
    284   size_t Offset(const u1 *const x) {
    285     return x - zipdata_out_;
    286   }
    287 
    288   // Write ZIP file header in the output. Since the compressed size is not
    289   // known in advance, it must be recorded later. This method returns a pointer
    290   // to "compressed size" in the file header that should be passed to
    291   // WriteFileSizeInLocalFileHeader() later.
    292   u1* WriteLocalFileHeader(const char *filename, const u4 attr);
    293 
    294   // Fill in the "compressed size" and "uncompressed size" fields in a local
    295   // file header previously written by WriteLocalFileHeader().
    296   size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr,
    297                                         size_t out_length,
    298                                         bool compress = false,
    299                                         const u4 crc = 0);
    300 };
    301 
    302 //
    303 // Implementation of InputZipFile
    304 //
    305 bool InputZipFile::ProcessNext() {
    306   // Process the next entry in the central directory. Also make sure that the
    307   // content pointer is in sync.
    308   size_t compressed, uncompressed;
    309   u4 offset;
    310   if (!ProcessCentralDirEntry(central_dir_current_, &compressed, &uncompressed,
    311                               filename, PATH_MAX, &attr, &offset)) {
    312     return false;
    313   }
    314 
    315   // There might be an offset specified in the central directory that does
    316   // not match the file offset, if so, correct the pointer.
    317   if (offset != 0 && (p != (zipdata_in_ + in_offset_ + offset))) {
    318     p = zipdata_in_ + offset;
    319   }
    320 
    321   if (EnsureRemaining(4, "signature") < 0) {
    322     return false;
    323   }
    324   u4 signature = get_u4le(p);
    325   if (signature == LOCAL_FILE_HEADER_SIGNATURE) {
    326     if (ProcessLocalFileEntry(compressed, uncompressed) < 0) {
    327       return false;
    328     }
    329   } else {
    330     error("local file header signature for file %s not found\n", filename);
    331     return false;
    332   }
    333 
    334   return true;
    335 }
    336 
    337 int InputZipFile::ProcessLocalFileEntry(
    338     size_t compressed_size, size_t uncompressed_size) {
    339   if (EnsureRemaining(26, "extract_version") < 0) {
    340     return -1;
    341   }
    342   extract_version_ = get_u2le(p);
    343   general_purpose_bit_flag_ = get_u2le(p);
    344 
    345   if ((general_purpose_bit_flag_ & ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED) != 0) {
    346     return error("Unsupported value (0x%04x) in general purpose bit flag.\n",
    347                  general_purpose_bit_flag_);
    348   }
    349 
    350   compression_method_ = get_u2le(p);
    351 
    352   if (compression_method_ != COMPRESSION_METHOD_DEFLATED &&
    353       compression_method_ != COMPRESSION_METHOD_STORED) {
    354     return error("Unsupported compression method (%d).\n",
    355                  compression_method_);
    356   }
    357 
    358   // skip over: last_mod_file_time, last_mod_file_date, crc32
    359   p += 2 + 2 + 4;
    360   compressed_size_ = get_u4le(p);
    361   uncompressed_size_ = get_u4le(p);
    362   file_name_length_ = get_u2le(p);
    363   extra_field_length_ = get_u2le(p);
    364 
    365   if (EnsureRemaining(file_name_length_, "file_name") < 0) {
    366     return -1;
    367   }
    368   file_name_ = p;
    369   p += file_name_length_;
    370 
    371   if (EnsureRemaining(extra_field_length_, "extra_field") < 0) {
    372     return -1;
    373   }
    374   extra_field_ = p;
    375   p += extra_field_length_;
    376 
    377   bool is_compressed = compression_method_ == COMPRESSION_METHOD_DEFLATED;
    378 
    379   // If the zip is compressed, compressed and uncompressed size members are
    380   // zero in the local file header. If not, check that they are the same as the
    381   // lengths from the central directory, otherwise, just believe the central
    382   // directory
    383   if (compressed_size_ == 0) {
    384     compressed_size_ = compressed_size;
    385   } else {
    386     if (compressed_size_ != compressed_size) {
    387       return error("central directory and file header inconsistent\n");
    388     }
    389   }
    390 
    391   if (uncompressed_size_ == 0) {
    392     uncompressed_size_ = uncompressed_size;
    393   } else {
    394     if (uncompressed_size_ != uncompressed_size) {
    395       return error("central directory and file header inconsistent\n");
    396     }
    397   }
    398 
    399   if (processor->Accept(filename, attr)) {
    400     if (ProcessFile(is_compressed) < 0) {
    401       return -1;
    402     }
    403   } else {
    404     if (SkipFile(is_compressed) < 0) {
    405       return -1;
    406     }
    407   }
    408 
    409   if (general_purpose_bit_flag_ & GENERAL_PURPOSE_BIT_FLAG_COMPRESSED) {
    410     // Skip the data descriptor. Some implementations do not put the signature
    411     // here, so check if the next 4 bytes are a signature, and if so, skip the
    412     // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip
    413     // the next 8 bytes (because the value just read was the CRC).
    414     u4 signature = get_u4le(p);
    415     if (signature == DATA_DESCRIPTOR_SIGNATURE) {
    416       p += 4 * 3;
    417     } else {
    418       p += 4 * 2;
    419     }
    420   }
    421 
    422   if (p > zipdata_in_mapped_ + MAX_MAPPED_REGION) {
    423     munmap(const_cast<u1 *>(zipdata_in_mapped_), MAX_MAPPED_REGION);
    424     zipdata_in_mapped_ += MAX_MAPPED_REGION;
    425   }
    426 
    427   return 0;
    428 }
    429 
    430 int InputZipFile::SkipFile(const bool compressed) {
    431   if (!compressed) {
    432     // In this case, compressed_size_ == uncompressed_size_ (since the file is
    433     // uncompressed), so we can use either.
    434     if (compressed_size_ != uncompressed_size_) {
    435       return error("compressed size != uncompressed size, although the file "
    436                    "is uncompressed.\n");
    437     }
    438   }
    439 
    440   if (EnsureRemaining(compressed_size_, "file_data") < 0) {
    441     return -1;
    442   }
    443   p += compressed_size_;
    444   return 0;
    445 }
    446 
    447 u1* InputZipFile::UncompressFile() {
    448   size_t in_offset = p - zipdata_in_;
    449   size_t remaining = in_length_ - in_offset;
    450   z_stream stream;
    451 
    452   stream.zalloc = Z_NULL;
    453   stream.zfree = Z_NULL;
    454   stream.opaque = Z_NULL;
    455   stream.avail_in = remaining;
    456   stream.next_in = (Bytef *) p;
    457 
    458   int ret = inflateInit2(&stream, -MAX_WBITS);
    459   if (ret != Z_OK) {
    460     error("inflateInit: %d\n", ret);
    461     return NULL;
    462   }
    463 
    464   int uncompressed_until_now = 0;
    465 
    466   while (true) {
    467     stream.avail_out = uncompressed_data_allocated_ - uncompressed_until_now;
    468     stream.next_out = uncompressed_data_ + uncompressed_until_now;
    469     int old_avail_out = stream.avail_out;
    470 
    471     ret = inflate(&stream, Z_SYNC_FLUSH);
    472     int uncompressed_now = old_avail_out - stream.avail_out;
    473     uncompressed_until_now += uncompressed_now;
    474 
    475     switch (ret) {
    476       case Z_STREAM_END: {
    477         // zlib said that there is no more data to decompress.
    478 
    479         u1 *new_p = reinterpret_cast<u1*>(stream.next_in);
    480         compressed_size_ = new_p - p;
    481         uncompressed_size_ = uncompressed_until_now;
    482         p = new_p;
    483         inflateEnd(&stream);
    484         return uncompressed_data_;
    485       }
    486 
    487       case Z_OK: {
    488         // zlib said that there is no more room in the buffer allocated for
    489         // the decompressed data. Enlarge that buffer and try again.
    490 
    491         if (uncompressed_data_allocated_ == MAX_BUFFER_SIZE) {
    492           error("ijar does not support decompressing files "
    493                 "larger than %dMB.\n",
    494                 (int) (MAX_BUFFER_SIZE/(1024*1024)));
    495           return NULL;
    496         }
    497 
    498         uncompressed_data_allocated_ *= 2;
    499         if (uncompressed_data_allocated_ > MAX_BUFFER_SIZE) {
    500           uncompressed_data_allocated_ = MAX_BUFFER_SIZE;
    501         }
    502 
    503         uncompressed_data_ = reinterpret_cast<u1*>(
    504             realloc(uncompressed_data_, uncompressed_data_allocated_));
    505         break;
    506       }
    507 
    508       case Z_DATA_ERROR:
    509       case Z_BUF_ERROR:
    510       case Z_STREAM_ERROR:
    511       case Z_NEED_DICT:
    512       default: {
    513         error("zlib returned error code %d during inflate.\n", ret);
    514         return NULL;
    515       }
    516     }
    517   }
    518 }
    519 
    520 int InputZipFile::ProcessFile(const bool compressed) {
    521   const u1 *file_data;
    522   if (compressed) {
    523     file_data = UncompressFile();
    524     if (file_data == NULL) {
    525       return -1;
    526     }
    527   } else {
    528     // In this case, compressed_size_ == uncompressed_size_ (since the file is
    529     // uncompressed), so we can use either.
    530     if (compressed_size_ != uncompressed_size_) {
    531       return error("compressed size != uncompressed size, although the file "
    532                    "is uncompressed.\n");
    533     }
    534 
    535     if (EnsureRemaining(compressed_size_, "file_data") < 0) {
    536       return -1;
    537     }
    538     file_data = p;
    539     p += compressed_size_;
    540   }
    541   processor->Process(filename, attr, file_data, uncompressed_size_);
    542   return 0;
    543 }
    544 
    545 
    546 // Reads and returns some metadata of the next file from the central directory:
    547 // - compressed size
    548 // - uncompressed size
    549 // - whether the entry is a class file (to be included in the output).
    550 // Precondition: p points to the beginning of an entry in the central dir
    551 // Postcondition: p points to the beginning of the next entry in the central dir
    552 // Returns true if the central directory contains another file and false if not.
    553 // Of course, in the latter case, the size output variables are not changed.
    554 // Note that the central directory is always followed by another data structure
    555 // that has a signature, so parsing it this way is safe.
    556 static bool ProcessCentralDirEntry(
    557     const u1 *&p, size_t *compressed_size, size_t *uncompressed_size,
    558     char *filename, size_t filename_size, u4 *attr, u4 *offset) {
    559   u4 signature = get_u4le(p);
    560   if (signature != CENTRAL_FILE_HEADER_SIGNATURE) {
    561     return false;
    562   }
    563 
    564   p += 16;  // skip to 'compressed size' field
    565   *compressed_size = get_u4le(p);
    566   *uncompressed_size = get_u4le(p);
    567   u2 file_name_length = get_u2le(p);
    568   u2 extra_field_length = get_u2le(p);
    569   u2 file_comment_length = get_u2le(p);
    570   p += 4;  // skip to external file attributes field
    571   *attr = get_u4le(p);
    572   *offset = get_u4le(p);
    573   {
    574     size_t len = (file_name_length < filename_size)
    575       ? file_name_length
    576       : (filename_size - 1);
    577     memcpy(reinterpret_cast<void*>(filename), p, len);
    578     filename[len] = 0;
    579   }
    580   p += file_name_length;
    581   p += extra_field_length;
    582   p += file_comment_length;
    583   return true;
    584 }
    585 
    586 // Gives a maximum bound on the size of the interface JAR. Basically, adds
    587 // the difference between the compressed and uncompressed sizes to the size
    588 // of the input file.
    589 u8 InputZipFile::CalculateOutputLength() {
    590   const u1* current = central_dir_;
    591 
    592   u8 compressed_size = 0;
    593   u8 uncompressed_size = 0;
    594   u8 skipped_compressed_size = 0;
    595   u4 attr;
    596   u4 offset;
    597   char filename[PATH_MAX];
    598 
    599   while (true) {
    600     size_t file_compressed, file_uncompressed;
    601     if (!ProcessCentralDirEntry(current,
    602                                 &file_compressed, &file_uncompressed,
    603                                 filename, PATH_MAX, &attr, &offset)) {
    604       break;
    605     }
    606 
    607     if (processor->Accept(filename, attr)) {
    608       compressed_size += (u8) file_compressed;
    609       uncompressed_size += (u8) file_uncompressed;
    610     } else {
    611       skipped_compressed_size += file_compressed;
    612     }
    613   }
    614 
    615   // The worst case is when the output is simply the input uncompressed. The
    616   // metadata in the zip file will stay the same, so the file will grow by the
    617   // difference between the compressed and uncompressed sizes.
    618   return (u8) in_length_ - skipped_compressed_size
    619       + (uncompressed_size - compressed_size);
    620 }
    621 
    622 // Given the data in the zip file, returns the offset of the central directory
    623 // and the number of files contained in it.
    624 bool FindZipCentralDirectory(const u1* bytes, size_t in_length,
    625                              u4* offset, const u1** central_dir) {
    626   static const int MAX_COMMENT_LENGTH = 0xffff;
    627   static const int CENTRAL_DIR_LOCATOR_SIZE = 22;
    628   // Maximum distance of start of central dir locator from end of file
    629   static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE;
    630   const u1* last_pos_to_check = in_length < MAX_DELTA
    631       ? bytes
    632       : bytes + (in_length - MAX_DELTA);
    633   const u1* current;
    634   bool found = false;
    635 
    636   for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE;
    637        current >= last_pos_to_check;
    638        current-- ) {
    639     const u1* p = current;
    640     if (get_u4le(p) != END_OF_CENTRAL_DIR_SIGNATURE) {
    641       continue;
    642     }
    643 
    644     p += 16;  // skip to comment length field
    645     u2 comment_length = get_u2le(p);
    646 
    647     // Does the comment go exactly till the end of the file?
    648     if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE
    649         != bytes + in_length) {
    650       continue;
    651     }
    652 
    653     // Hooray, we found it!
    654     found = true;
    655     break;
    656   }
    657 
    658   if (!found) {
    659     fprintf(stderr, "file is invalid or corrupted (missing end of central "
    660                     "directory record)\n");
    661     return false;
    662   }
    663 
    664   const u1* end_of_central_dir = current;
    665   get_u4le(current);  // central directory locator signature, already checked
    666   u2 number_of_this_disk = get_u2le(current);
    667   u2 disk_with_central_dir = get_u2le(current);
    668   u2 central_dir_entries_on_this_disk = get_u2le(current);
    669   u2 central_dir_entries = get_u2le(current);
    670   u4 central_dir_size = get_u4le(current);
    671   u4 central_dir_offset = get_u4le(current);
    672   u2 file_comment_length = get_u2le(current);
    673   current += file_comment_length;  // set current to the end of the central dir
    674 
    675   if (number_of_this_disk != 0
    676     || disk_with_central_dir != 0
    677     || central_dir_entries_on_this_disk != central_dir_entries) {
    678     fprintf(stderr, "multi-disk JAR files are not supported\n");
    679     return false;
    680   }
    681 
    682   // Do not change output values before determining that they are OK.
    683   *offset = central_dir_offset;
    684   // Central directory start can then be used to determine the actual
    685   // starts of the zip file (which can be different in case of a non-zip
    686   // header like for auto-extractable binaries).
    687   *central_dir = end_of_central_dir - central_dir_size;
    688   return true;
    689 }
    690 
    691 void InputZipFile::Reset() {
    692   central_dir_current_ = central_dir_;
    693   zipdata_in_mapped_ = zipdata_in_;
    694   p = zipdata_in_ + in_offset_;
    695 }
    696 
    697 int ZipExtractor::ProcessAll() {
    698   while (ProcessNext()) {}
    699   if (GetError() != NULL) {
    700     return -1;
    701   }
    702   return 0;
    703 }
    704 
    705 ZipExtractor* ZipExtractor::Create(const char* filename,
    706                                    ZipExtractorProcessor *processor) {
    707   int fd_in = open(filename, O_RDONLY);
    708   if (fd_in < 0) {
    709     return NULL;
    710   }
    711 
    712   off_t length = lseek(fd_in, 0, SEEK_END);
    713   if (length < 0) {
    714     return NULL;
    715   }
    716 
    717   void *zipdata_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0);
    718   if (zipdata_in == MAP_FAILED) {
    719     return NULL;
    720   }
    721 
    722   u4 central_dir_offset;
    723   const u1 *central_dir = NULL;
    724 
    725   if (!devtools_ijar::FindZipCentralDirectory(
    726           static_cast<const u1*>(zipdata_in), length,
    727           &central_dir_offset, &central_dir)) {
    728     errno = EIO;  // we don't really have a good error number
    729     return NULL;
    730   }
    731   const u1 *zipdata_start = static_cast<const u1*>(zipdata_in);
    732   off_t offset = - static_cast<off_t>(zipdata_start
    733                                       + central_dir_offset
    734                                       - central_dir);
    735 
    736   return new InputZipFile(processor, fd_in, length, offset,
    737                           zipdata_start, central_dir);
    738 }
    739 
    740 InputZipFile::InputZipFile(ZipExtractorProcessor *processor, int fd,
    741                            off_t in_length, off_t in_offset,
    742                            const u1* zipdata_in, const u1* central_dir)
    743   : processor(processor), fd_in(fd),
    744     zipdata_in_(zipdata_in), zipdata_in_mapped_(zipdata_in),
    745     central_dir_(central_dir), in_length_(in_length), in_offset_(in_offset),
    746     p(zipdata_in + in_offset), central_dir_current_(central_dir) {
    747   uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE;
    748   uncompressed_data_ =
    749     reinterpret_cast<u1*>(malloc(uncompressed_data_allocated_));
    750   errmsg[0] = 0;
    751 }
    752 
    753 InputZipFile::~InputZipFile() {
    754   free(uncompressed_data_);
    755   close(fd_in);
    756 }
    757 
    758 
    759 //
    760 // Implementation of OutputZipFile
    761 //
    762 int OutputZipFile::WriteEmptyFile(const char *filename) {
    763   const u1* file_name = (const u1*) filename;
    764   size_t file_name_length = strlen(filename);
    765 
    766   LocalFileEntry *entry = new LocalFileEntry;
    767   entry->local_header_offset = Offset(q);
    768   entry->external_attr = 0;
    769   entry->crc32 = 0;
    770 
    771   // Output the ZIP local_file_header:
    772   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
    773   put_u2le(q, 10);  // extract_version
    774   put_u2le(q, 0);  // general_purpose_bit_flag
    775   put_u2le(q, 0);  // compression_method
    776   put_u2le(q, 0);  // last_mod_file_time
    777   put_u2le(q, 0);  // last_mod_file_date
    778   put_u4le(q, entry->crc32);  // crc32
    779   put_u4le(q, 0);  // compressed_size
    780   put_u4le(q, 0);  // uncompressed_size
    781   put_u2le(q, file_name_length);
    782   put_u2le(q, 0);  // extra_field_length
    783   put_n(q, file_name, file_name_length);
    784 
    785   entry->file_name_length = file_name_length;
    786   entry->extra_field_length = 0;
    787   entry->compressed_length = 0;
    788   entry->uncompressed_length = 0;
    789   entry->compression_method = 0;
    790   entry->extra_field = (const u1 *)"";
    791   entry->file_name = (u1*) strdup((const char *) file_name);
    792   entries_.push_back(entry);
    793 
    794   return 0;
    795 }
    796 
    797 void OutputZipFile::WriteCentralDirectory() {
    798   // central directory:
    799   const u1 *central_directory_start = q;
    800   for (size_t ii = 0; ii < entries_.size(); ++ii) {
    801     LocalFileEntry *entry = entries_[ii];
    802     put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE);
    803     put_u2le(q, 0);  // version made by
    804 
    805     put_u2le(q, ZIP_VERSION_TO_EXTRACT);  // version to extract
    806     put_u2le(q, 0);  // general purpose bit flag
    807     put_u2le(q, entry->compression_method);  // compression method:
    808     put_u2le(q, 0);                          // last_mod_file_time
    809     put_u2le(q, 0);  // last_mod_file_date
    810     put_u4le(q, entry->crc32);  // crc32
    811     put_u4le(q, entry->compressed_length);    // compressed_size
    812     put_u4le(q, entry->uncompressed_length);  // uncompressed_size
    813     put_u2le(q, entry->file_name_length);
    814     put_u2le(q, entry->extra_field_length);
    815 
    816     put_u2le(q, 0);  // file comment length
    817     put_u2le(q, 0);  // disk number start
    818     put_u2le(q, 0);  // internal file attributes
    819     put_u4le(q, entry->external_attr);  // external file attributes
    820     // relative offset of local header:
    821     put_u4le(q, entry->local_header_offset);
    822 
    823     put_n(q, entry->file_name, entry->file_name_length);
    824     put_n(q, entry->extra_field, entry->extra_field_length);
    825   }
    826   u4 central_directory_size = q - central_directory_start;
    827 
    828   put_u4le(q, END_OF_CENTRAL_DIR_SIGNATURE);
    829   put_u2le(q, 0);  // number of this disk
    830   put_u2le(q, 0);  // number of the disk with the start of the central directory
    831   put_u2le(q, entries_.size());  // # central dir entries on this disk
    832   put_u2le(q, entries_.size());  // total # entries in the central directory
    833   put_u4le(q, central_directory_size);  // size of the central directory
    834   put_u4le(q, Offset(central_directory_start));  // offset of start of central
    835                                                  // directory wrt starting disk
    836   put_u2le(q, 0);  // .ZIP file comment length
    837 }
    838 
    839 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) {
    840   off_t file_name_length_ = strlen(filename);
    841   LocalFileEntry *entry = new LocalFileEntry;
    842   entry->local_header_offset = Offset(q);
    843   entry->file_name_length = file_name_length_;
    844   entry->file_name = new u1[file_name_length_];
    845   entry->external_attr = attr;
    846   memcpy(entry->file_name, filename, file_name_length_);
    847   entry->extra_field_length = 0;
    848   entry->extra_field = (const u1 *)"";
    849   entry->crc32 = 0;
    850 
    851   // Output the ZIP local_file_header:
    852   put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
    853   put_u2le(q, ZIP_VERSION_TO_EXTRACT);     // version to extract
    854   put_u2le(q, 0);                          // general purpose bit flag
    855   u1 *header_ptr = q;
    856   put_u2le(q, COMPRESSION_METHOD_STORED);  // compression method = placeholder
    857   put_u2le(q, 0);                          // last_mod_file_time
    858   put_u2le(q, 0);                          // last_mod_file_date
    859   put_u4le(q, entry->crc32);               // crc32
    860   put_u4le(q, 0);  // compressed_size = placeholder
    861   put_u4le(q, 0);  // uncompressed_size = placeholder
    862   put_u2le(q, entry->file_name_length);
    863   put_u2le(q, entry->extra_field_length);
    864 
    865   put_n(q, entry->file_name, entry->file_name_length);
    866   put_n(q, entry->extra_field, entry->extra_field_length);
    867   entries_.push_back(entry);
    868 
    869   return header_ptr;
    870 }
    871 
    872 // Try to compress a file entry in memory using the deflate algorithm.
    873 // It will compress buf (of size length) unless the compressed size is bigger
    874 // than the input size. The result will overwrite the content of buf and the
    875 // final size is returned.
    876 size_t TryDeflate(u1 *buf, size_t length) {
    877   u1 *outbuf = reinterpret_cast<u1 *>(malloc(length));
    878   z_stream stream;
    879 
    880   // Initialize the z_stream strcut for reading from buf and wrinting in outbuf.
    881   stream.zalloc = Z_NULL;
    882   stream.zfree = Z_NULL;
    883   stream.opaque = Z_NULL;
    884   stream.total_in = length;
    885   stream.avail_in = length;
    886   stream.total_out = length;
    887   stream.avail_out = length;
    888   stream.next_in = buf;
    889   stream.next_out = outbuf;
    890 
    891   // deflateInit2 negative windows size prevent the zlib wrapper to be used.
    892   if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
    893                   -MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
    894     // Failure to compress => return the buffer uncompressed
    895     free(outbuf);
    896     return length;
    897   }
    898 
    899   if (deflate(&stream, Z_FINISH) == Z_STREAM_END) {
    900     // Compression successful and fits in outbuf, let's copy the result in buf.
    901     length = stream.total_out;
    902     memcpy(buf, outbuf, length);
    903   }
    904 
    905   deflateEnd(&stream);
    906   free(outbuf);
    907 
    908   // Return the length of the resulting buffer
    909   return length;
    910 }
    911 
    912 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr,
    913                                                      size_t out_length,
    914                                                      bool compress,
    915                                                      const u4 crc) {
    916   size_t compressed_size = out_length;
    917   if (compress) {
    918     compressed_size = TryDeflate(q, out_length);
    919   }
    920   // compression method
    921   if (compressed_size < out_length) {
    922     put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED);
    923   } else {
    924     put_u2le(header_ptr, COMPRESSION_METHOD_STORED);
    925   }
    926   header_ptr += 4;
    927   put_u4le(header_ptr, crc);              // crc32
    928   put_u4le(header_ptr, compressed_size);  // compressed_size
    929   put_u4le(header_ptr, out_length);       // uncompressed_size
    930   return compressed_size;
    931 }
    932 
    933 int OutputZipFile::Finish() {
    934   if (fd_out > 0) {
    935     WriteCentralDirectory();
    936     if (ftruncate(fd_out, GetSize()) < 0) {
    937       return error("ftruncate(fd_out, GetSize()): %s", strerror(errno));
    938     }
    939     if (close(fd_out) < 0) {
    940       return error("close(fd_out): %s", strerror(errno));
    941     }
    942     fd_out = -1;
    943   }
    944   return 0;
    945 }
    946 
    947 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) {
    948   header_ptr = WriteLocalFileHeader(filename, attr);
    949   return q;
    950 }
    951 
    952 int OutputZipFile::FinishFile(size_t filelength, bool compress,
    953                               bool compute_crc) {
    954   u4 crc = 0;
    955   if (compute_crc) {
    956     crc = crc32(crc, q, filelength);
    957   }
    958   size_t compressed_size =
    959       WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress, crc);
    960   entries_.back()->crc32 = crc;
    961   entries_.back()->compressed_length = compressed_size;
    962   entries_.back()->uncompressed_length = filelength;
    963   if (compressed_size < filelength) {
    964     entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED;
    965   } else {
    966     entries_.back()->compression_method = COMPRESSION_METHOD_STORED;
    967   }
    968   q += compressed_size;
    969   return 0;
    970 }
    971 
    972 ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) {
    973   if (estimated_size > kMaximumOutputSize) {
    974     fprintf(stderr,
    975             "Uncompressed input jar has size %llu, "
    976             "which exceeds the maximum supported output size %llu.\n"
    977             "Assuming that ijar will be smaller and hoping for the best.\n",
    978             estimated_size, kMaximumOutputSize);
    979     estimated_size = kMaximumOutputSize;
    980   }
    981 
    982   int fd_out = open(zip_file, O_CREAT|O_RDWR|O_TRUNC, 0644);
    983   if (fd_out < 0) {
    984     return NULL;
    985   }
    986 
    987   // Create mmap-able sparse file
    988   if (ftruncate(fd_out, estimated_size) < 0) {
    989     return NULL;
    990   }
    991 
    992   // Ensure that any buffer overflow in JarStripper will result in
    993   // SIGSEGV or SIGBUS by over-allocating beyond the end of the file.
    994   size_t mmap_length = std::min(estimated_size + sysconf(_SC_PAGESIZE),
    995                                 (u8) std::numeric_limits<size_t>::max());
    996 
    997   void *zipdata_out = mmap(NULL, mmap_length, PROT_WRITE,
    998                            MAP_SHARED, fd_out, 0);
    999   if (zipdata_out == MAP_FAILED) {
   1000     fprintf(stderr, "output_length=%llu\n", estimated_size);
   1001     return NULL;
   1002   }
   1003 
   1004   return new OutputZipFile(fd_out, (u1*) zipdata_out);
   1005 }
   1006 
   1007 u8 ZipBuilder::EstimateSize(char **files) {
   1008   struct stat statst;
   1009   // Digital signature field size = 6, End of central directory = 22, Total = 28
   1010   u8 size = 28;
   1011   // Count the size of all the files in the input to estimate the size of the
   1012   // output.
   1013   for (int i = 0; files[i] != NULL; i++) {
   1014     if (stat(files[i], &statst) != 0) {
   1015       fprintf(stderr, "File %s does not seem to exist.", files[i]);
   1016       return 0;
   1017     }
   1018     size += statst.st_size;
   1019     // Add sizes of Zip meta data
   1020     // local file header = 30 bytes
   1021     // data descriptor = 12 bytes
   1022     // central directory descriptor = 46 bytes
   1023     //    Total: 88bytes
   1024     size += 88;
   1025     // The filename is stored twice (once in the central directory
   1026     // and once in the local file header).
   1027     size += strlen(files[i]) * 2;
   1028   }
   1029   return size;
   1030 }
   1031 
   1032 }  // namespace devtools_ijar
   1033