1 // Copyright 2007 Alan Donovan. All rights reserved. 2 // 3 // Author: Alan Donovan <adonovan (at) google.com> 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 // zip.cc -- .zip (.jar) file reading/writing routines. 18 // 19 20 // See README.txt for details. 21 // 22 // See http://www.pkware.com/documents/casestudies/APPNOTE.TXT 23 // for definition of PKZIP file format. 24 25 #define _FILE_OFFSET_BITS 64 // Support zip files larger than 2GB 26 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <stddef.h> 30 #include <stdint.h> 31 #include <stdarg.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <sys/mman.h> 36 #include <unistd.h> 37 #include <limits.h> 38 #include <limits> 39 #include <vector> 40 41 #include "zip.h" 42 #include <zlib.h> 43 44 #define LOCAL_FILE_HEADER_SIGNATURE 0x04034b50 45 #define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50 46 #define END_OF_CENTRAL_DIR_SIGNATURE 0x06054b50 47 #define DATA_DESCRIPTOR_SIGNATURE 0x08074b50 48 49 // version to extract: 1.0 - default value from APPNOTE.TXT. 50 // Output JAR files contain no extra ZIP features, so this is enough. 51 #define ZIP_VERSION_TO_EXTRACT 10 52 #define COMPRESSION_METHOD_STORED 0 // no compression 53 #define COMPRESSION_METHOD_DEFLATED 8 54 55 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3) 56 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11) 57 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1)) 58 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \ 59 (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \ 60 | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \ 61 | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED) 62 63 namespace devtools_ijar { 64 // In the absence of ZIP64 support, zip files are limited to 4GB. 65 // http://www.info-zip.org/FAQ.html#limits 66 static const u8 kMaximumOutputSize = std::numeric_limits<uint32_t>::max(); 67 68 static bool ProcessCentralDirEntry(const u1 *&p, 69 size_t *compressed_size, 70 size_t *uncompressed_size, 71 char *filename, 72 size_t filename_size, 73 u4 *attr, 74 u4 *offset); 75 76 // 77 // A class representing a ZipFile for reading. Its public API is exposed 78 // using the ZipExtractor abstract class. 79 // 80 class InputZipFile : public ZipExtractor { 81 public: 82 InputZipFile(ZipExtractorProcessor *processor, int fd, off_t in_length, 83 off_t in_offset, const u1* zipdata_in, const u1* central_dir); 84 virtual ~InputZipFile(); 85 86 virtual const char* GetError() { 87 if (errmsg[0] == 0) { 88 return NULL; 89 } 90 return errmsg; 91 } 92 93 virtual bool ProcessNext(); 94 virtual void Reset(); 95 virtual size_t GetSize() { 96 return in_length_; 97 } 98 99 virtual u8 CalculateOutputLength(); 100 101 private: 102 ZipExtractorProcessor *processor; 103 104 int fd_in; // Input file descripor 105 106 // InputZipFile is responsible for maintaining the following 107 // pointers. They are allocated by the Create() method before 108 // the object is actually created using mmap. 109 const u1 * const zipdata_in_; // start of input file mmap 110 const u1 * zipdata_in_mapped_; // start of still mapped region 111 const u1 * const central_dir_; // central directory in input file 112 113 size_t in_length_; // size of the input file 114 size_t in_offset_; // offset the input file 115 116 const u1 *p; // input cursor 117 118 const u1* central_dir_current_; // central dir input cursor 119 120 // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every 121 // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is 122 // not enough, we bail out. We only decompress class files, so they should 123 // be smaller than 64K anyway, but we give a little leeway. 124 // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the 125 // ZIP. It is set to 128M here so we can uncompress the Bazel server with 126 // this library. 127 static const size_t INITIAL_BUFFER_SIZE = 256 * 1024; // 256K 128 static const size_t MAX_BUFFER_SIZE = 128 * 1024 * 1024; 129 static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024; 130 131 // These metadata fields are the fields of the ZIP header of the file being 132 // processed. 133 u2 extract_version_; 134 u2 general_purpose_bit_flag_; 135 u2 compression_method_; 136 u4 uncompressed_size_; 137 u4 compressed_size_; 138 u2 file_name_length_; 139 u2 extra_field_length_; 140 const u1 *file_name_; 141 const u1 *extra_field_; 142 143 // Administration of memory reserved for decompressed data. We use the same 144 // buffer for each file to avoid some malloc()/free() calls and free the 145 // memory only in the dtor. C-style memory management is used so that we 146 // can call realloc. 147 u1 *uncompressed_data_; 148 size_t uncompressed_data_allocated_; 149 150 // Copy of the last filename entry - Null-terminated. 151 char filename[PATH_MAX]; 152 // The external file attribute field 153 u4 attr; 154 155 // last error 156 char errmsg[4*PATH_MAX]; 157 158 int error(const char *fmt, ...) { 159 va_list ap; 160 va_start(ap, fmt); 161 vsnprintf(errmsg, 4*PATH_MAX, fmt, ap); 162 va_end(ap); 163 return -1; 164 } 165 166 // Check that at least n bytes remain in the input file, otherwise 167 // abort with an error message. "state" is the name of the field 168 // we're about to read, for diagnostics. 169 int EnsureRemaining(size_t n, const char *state) { 170 size_t in_offset = p - zipdata_in_; 171 size_t remaining = in_length_ - in_offset; 172 if (n > remaining) { 173 return error("Premature end of file (at offset %zd, state=%s); " 174 "expected %zd more bytes but found %zd.\n", 175 in_offset, state, n, remaining); 176 } 177 return 0; 178 } 179 180 // Read one entry from input zip file 181 int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size); 182 183 // Uncompress a file from the archive using zlib. The pointer returned 184 // is owned by InputZipFile, so it must not be freed. Advances the input 185 // cursor to the first byte after the compressed data. 186 u1* UncompressFile(); 187 188 // Skip a file 189 int SkipFile(const bool compressed); 190 191 // Process a file 192 int ProcessFile(const bool compressed); 193 }; 194 195 // 196 // A class implementing ZipBuilder that represent an open zip file for writing. 197 // 198 class OutputZipFile : public ZipBuilder { 199 public: 200 OutputZipFile(int fd, u1 * const zipdata_out) : 201 fd_out(fd), 202 zipdata_out_(zipdata_out), 203 q(zipdata_out) { 204 errmsg[0] = 0; 205 } 206 207 virtual const char* GetError() { 208 if (errmsg[0] == 0) { 209 return NULL; 210 } 211 return errmsg; 212 } 213 214 virtual ~OutputZipFile() { Finish(); } 215 virtual u1* NewFile(const char* filename, const u4 attr); 216 virtual int FinishFile(size_t filelength, bool compress = false, 217 bool compute_crc = false); 218 virtual int WriteEmptyFile(const char *filename); 219 virtual size_t GetSize() { 220 return Offset(q); 221 } 222 virtual int GetNumberFiles() { 223 return entries_.size(); 224 } 225 virtual int Finish(); 226 227 private: 228 struct LocalFileEntry { 229 // Start of the local header (in the output buffer). 230 size_t local_header_offset; 231 232 // Sizes of the file entry 233 size_t uncompressed_length; 234 size_t compressed_length; 235 236 // Compression method 237 u2 compression_method; 238 239 // CRC32 240 u4 crc32; 241 242 // external attributes field 243 u4 external_attr; 244 245 // Start/length of the file_name in the local header. 246 u1 *file_name; 247 u2 file_name_length; 248 249 // Start/length of the extra_field in the local header. 250 const u1 *extra_field; 251 u2 extra_field_length; 252 }; 253 254 int fd_out; // file descriptor for the output file 255 256 // OutputZipFile is responsible for maintaining the following 257 // pointers. They are allocated by the Create() method before 258 // the object is actually created using mmap. 259 u1 * const zipdata_out_; // start of output file mmap 260 u1 *q; // output cursor 261 262 u1 *header_ptr; // Current pointer to "compression method" entry. 263 264 // List of entries to write the central directory 265 std::vector<LocalFileEntry*> entries_; 266 267 // last error 268 char errmsg[4*PATH_MAX]; 269 270 int error(const char *fmt, ...) { 271 va_list ap; 272 va_start(ap, fmt); 273 vsnprintf(errmsg, 4*PATH_MAX, fmt, ap); 274 va_end(ap); 275 return -1; 276 } 277 278 // Write the ZIP central directory structure for each local file 279 // entry in "entries". 280 void WriteCentralDirectory(); 281 282 // Returns the offset of the pointer relative to the start of the 283 // output zip file. 284 size_t Offset(const u1 *const x) { 285 return x - zipdata_out_; 286 } 287 288 // Write ZIP file header in the output. Since the compressed size is not 289 // known in advance, it must be recorded later. This method returns a pointer 290 // to "compressed size" in the file header that should be passed to 291 // WriteFileSizeInLocalFileHeader() later. 292 u1* WriteLocalFileHeader(const char *filename, const u4 attr); 293 294 // Fill in the "compressed size" and "uncompressed size" fields in a local 295 // file header previously written by WriteLocalFileHeader(). 296 size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr, 297 size_t out_length, 298 bool compress = false, 299 const u4 crc = 0); 300 }; 301 302 // 303 // Implementation of InputZipFile 304 // 305 bool InputZipFile::ProcessNext() { 306 // Process the next entry in the central directory. Also make sure that the 307 // content pointer is in sync. 308 size_t compressed, uncompressed; 309 u4 offset; 310 if (!ProcessCentralDirEntry(central_dir_current_, &compressed, &uncompressed, 311 filename, PATH_MAX, &attr, &offset)) { 312 return false; 313 } 314 315 // There might be an offset specified in the central directory that does 316 // not match the file offset, if so, correct the pointer. 317 if (offset != 0 && (p != (zipdata_in_ + in_offset_ + offset))) { 318 p = zipdata_in_ + offset; 319 } 320 321 if (EnsureRemaining(4, "signature") < 0) { 322 return false; 323 } 324 u4 signature = get_u4le(p); 325 if (signature == LOCAL_FILE_HEADER_SIGNATURE) { 326 if (ProcessLocalFileEntry(compressed, uncompressed) < 0) { 327 return false; 328 } 329 } else { 330 error("local file header signature for file %s not found\n", filename); 331 return false; 332 } 333 334 return true; 335 } 336 337 int InputZipFile::ProcessLocalFileEntry( 338 size_t compressed_size, size_t uncompressed_size) { 339 if (EnsureRemaining(26, "extract_version") < 0) { 340 return -1; 341 } 342 extract_version_ = get_u2le(p); 343 general_purpose_bit_flag_ = get_u2le(p); 344 345 if ((general_purpose_bit_flag_ & ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED) != 0) { 346 return error("Unsupported value (0x%04x) in general purpose bit flag.\n", 347 general_purpose_bit_flag_); 348 } 349 350 compression_method_ = get_u2le(p); 351 352 if (compression_method_ != COMPRESSION_METHOD_DEFLATED && 353 compression_method_ != COMPRESSION_METHOD_STORED) { 354 return error("Unsupported compression method (%d).\n", 355 compression_method_); 356 } 357 358 // skip over: last_mod_file_time, last_mod_file_date, crc32 359 p += 2 + 2 + 4; 360 compressed_size_ = get_u4le(p); 361 uncompressed_size_ = get_u4le(p); 362 file_name_length_ = get_u2le(p); 363 extra_field_length_ = get_u2le(p); 364 365 if (EnsureRemaining(file_name_length_, "file_name") < 0) { 366 return -1; 367 } 368 file_name_ = p; 369 p += file_name_length_; 370 371 if (EnsureRemaining(extra_field_length_, "extra_field") < 0) { 372 return -1; 373 } 374 extra_field_ = p; 375 p += extra_field_length_; 376 377 bool is_compressed = compression_method_ == COMPRESSION_METHOD_DEFLATED; 378 379 // If the zip is compressed, compressed and uncompressed size members are 380 // zero in the local file header. If not, check that they are the same as the 381 // lengths from the central directory, otherwise, just believe the central 382 // directory 383 if (compressed_size_ == 0) { 384 compressed_size_ = compressed_size; 385 } else { 386 if (compressed_size_ != compressed_size) { 387 return error("central directory and file header inconsistent\n"); 388 } 389 } 390 391 if (uncompressed_size_ == 0) { 392 uncompressed_size_ = uncompressed_size; 393 } else { 394 if (uncompressed_size_ != uncompressed_size) { 395 return error("central directory and file header inconsistent\n"); 396 } 397 } 398 399 if (processor->Accept(filename, attr)) { 400 if (ProcessFile(is_compressed) < 0) { 401 return -1; 402 } 403 } else { 404 if (SkipFile(is_compressed) < 0) { 405 return -1; 406 } 407 } 408 409 if (general_purpose_bit_flag_ & GENERAL_PURPOSE_BIT_FLAG_COMPRESSED) { 410 // Skip the data descriptor. Some implementations do not put the signature 411 // here, so check if the next 4 bytes are a signature, and if so, skip the 412 // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip 413 // the next 8 bytes (because the value just read was the CRC). 414 u4 signature = get_u4le(p); 415 if (signature == DATA_DESCRIPTOR_SIGNATURE) { 416 p += 4 * 3; 417 } else { 418 p += 4 * 2; 419 } 420 } 421 422 if (p > zipdata_in_mapped_ + MAX_MAPPED_REGION) { 423 munmap(const_cast<u1 *>(zipdata_in_mapped_), MAX_MAPPED_REGION); 424 zipdata_in_mapped_ += MAX_MAPPED_REGION; 425 } 426 427 return 0; 428 } 429 430 int InputZipFile::SkipFile(const bool compressed) { 431 if (!compressed) { 432 // In this case, compressed_size_ == uncompressed_size_ (since the file is 433 // uncompressed), so we can use either. 434 if (compressed_size_ != uncompressed_size_) { 435 return error("compressed size != uncompressed size, although the file " 436 "is uncompressed.\n"); 437 } 438 } 439 440 if (EnsureRemaining(compressed_size_, "file_data") < 0) { 441 return -1; 442 } 443 p += compressed_size_; 444 return 0; 445 } 446 447 u1* InputZipFile::UncompressFile() { 448 size_t in_offset = p - zipdata_in_; 449 size_t remaining = in_length_ - in_offset; 450 z_stream stream; 451 452 stream.zalloc = Z_NULL; 453 stream.zfree = Z_NULL; 454 stream.opaque = Z_NULL; 455 stream.avail_in = remaining; 456 stream.next_in = (Bytef *) p; 457 458 int ret = inflateInit2(&stream, -MAX_WBITS); 459 if (ret != Z_OK) { 460 error("inflateInit: %d\n", ret); 461 return NULL; 462 } 463 464 int uncompressed_until_now = 0; 465 466 while (true) { 467 stream.avail_out = uncompressed_data_allocated_ - uncompressed_until_now; 468 stream.next_out = uncompressed_data_ + uncompressed_until_now; 469 int old_avail_out = stream.avail_out; 470 471 ret = inflate(&stream, Z_SYNC_FLUSH); 472 int uncompressed_now = old_avail_out - stream.avail_out; 473 uncompressed_until_now += uncompressed_now; 474 475 switch (ret) { 476 case Z_STREAM_END: { 477 // zlib said that there is no more data to decompress. 478 479 u1 *new_p = reinterpret_cast<u1*>(stream.next_in); 480 compressed_size_ = new_p - p; 481 uncompressed_size_ = uncompressed_until_now; 482 p = new_p; 483 inflateEnd(&stream); 484 return uncompressed_data_; 485 } 486 487 case Z_OK: { 488 // zlib said that there is no more room in the buffer allocated for 489 // the decompressed data. Enlarge that buffer and try again. 490 491 if (uncompressed_data_allocated_ == MAX_BUFFER_SIZE) { 492 error("ijar does not support decompressing files " 493 "larger than %dMB.\n", 494 (int) (MAX_BUFFER_SIZE/(1024*1024))); 495 return NULL; 496 } 497 498 uncompressed_data_allocated_ *= 2; 499 if (uncompressed_data_allocated_ > MAX_BUFFER_SIZE) { 500 uncompressed_data_allocated_ = MAX_BUFFER_SIZE; 501 } 502 503 uncompressed_data_ = reinterpret_cast<u1*>( 504 realloc(uncompressed_data_, uncompressed_data_allocated_)); 505 break; 506 } 507 508 case Z_DATA_ERROR: 509 case Z_BUF_ERROR: 510 case Z_STREAM_ERROR: 511 case Z_NEED_DICT: 512 default: { 513 error("zlib returned error code %d during inflate.\n", ret); 514 return NULL; 515 } 516 } 517 } 518 } 519 520 int InputZipFile::ProcessFile(const bool compressed) { 521 const u1 *file_data; 522 if (compressed) { 523 file_data = UncompressFile(); 524 if (file_data == NULL) { 525 return -1; 526 } 527 } else { 528 // In this case, compressed_size_ == uncompressed_size_ (since the file is 529 // uncompressed), so we can use either. 530 if (compressed_size_ != uncompressed_size_) { 531 return error("compressed size != uncompressed size, although the file " 532 "is uncompressed.\n"); 533 } 534 535 if (EnsureRemaining(compressed_size_, "file_data") < 0) { 536 return -1; 537 } 538 file_data = p; 539 p += compressed_size_; 540 } 541 processor->Process(filename, attr, file_data, uncompressed_size_); 542 return 0; 543 } 544 545 546 // Reads and returns some metadata of the next file from the central directory: 547 // - compressed size 548 // - uncompressed size 549 // - whether the entry is a class file (to be included in the output). 550 // Precondition: p points to the beginning of an entry in the central dir 551 // Postcondition: p points to the beginning of the next entry in the central dir 552 // Returns true if the central directory contains another file and false if not. 553 // Of course, in the latter case, the size output variables are not changed. 554 // Note that the central directory is always followed by another data structure 555 // that has a signature, so parsing it this way is safe. 556 static bool ProcessCentralDirEntry( 557 const u1 *&p, size_t *compressed_size, size_t *uncompressed_size, 558 char *filename, size_t filename_size, u4 *attr, u4 *offset) { 559 u4 signature = get_u4le(p); 560 if (signature != CENTRAL_FILE_HEADER_SIGNATURE) { 561 return false; 562 } 563 564 p += 16; // skip to 'compressed size' field 565 *compressed_size = get_u4le(p); 566 *uncompressed_size = get_u4le(p); 567 u2 file_name_length = get_u2le(p); 568 u2 extra_field_length = get_u2le(p); 569 u2 file_comment_length = get_u2le(p); 570 p += 4; // skip to external file attributes field 571 *attr = get_u4le(p); 572 *offset = get_u4le(p); 573 { 574 size_t len = (file_name_length < filename_size) 575 ? file_name_length 576 : (filename_size - 1); 577 memcpy(reinterpret_cast<void*>(filename), p, len); 578 filename[len] = 0; 579 } 580 p += file_name_length; 581 p += extra_field_length; 582 p += file_comment_length; 583 return true; 584 } 585 586 // Gives a maximum bound on the size of the interface JAR. Basically, adds 587 // the difference between the compressed and uncompressed sizes to the size 588 // of the input file. 589 u8 InputZipFile::CalculateOutputLength() { 590 const u1* current = central_dir_; 591 592 u8 compressed_size = 0; 593 u8 uncompressed_size = 0; 594 u8 skipped_compressed_size = 0; 595 u4 attr; 596 u4 offset; 597 char filename[PATH_MAX]; 598 599 while (true) { 600 size_t file_compressed, file_uncompressed; 601 if (!ProcessCentralDirEntry(current, 602 &file_compressed, &file_uncompressed, 603 filename, PATH_MAX, &attr, &offset)) { 604 break; 605 } 606 607 if (processor->Accept(filename, attr)) { 608 compressed_size += (u8) file_compressed; 609 uncompressed_size += (u8) file_uncompressed; 610 } else { 611 skipped_compressed_size += file_compressed; 612 } 613 } 614 615 // The worst case is when the output is simply the input uncompressed. The 616 // metadata in the zip file will stay the same, so the file will grow by the 617 // difference between the compressed and uncompressed sizes. 618 return (u8) in_length_ - skipped_compressed_size 619 + (uncompressed_size - compressed_size); 620 } 621 622 // Given the data in the zip file, returns the offset of the central directory 623 // and the number of files contained in it. 624 bool FindZipCentralDirectory(const u1* bytes, size_t in_length, 625 u4* offset, const u1** central_dir) { 626 static const int MAX_COMMENT_LENGTH = 0xffff; 627 static const int CENTRAL_DIR_LOCATOR_SIZE = 22; 628 // Maximum distance of start of central dir locator from end of file 629 static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE; 630 const u1* last_pos_to_check = in_length < MAX_DELTA 631 ? bytes 632 : bytes + (in_length - MAX_DELTA); 633 const u1* current; 634 bool found = false; 635 636 for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE; 637 current >= last_pos_to_check; 638 current-- ) { 639 const u1* p = current; 640 if (get_u4le(p) != END_OF_CENTRAL_DIR_SIGNATURE) { 641 continue; 642 } 643 644 p += 16; // skip to comment length field 645 u2 comment_length = get_u2le(p); 646 647 // Does the comment go exactly till the end of the file? 648 if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE 649 != bytes + in_length) { 650 continue; 651 } 652 653 // Hooray, we found it! 654 found = true; 655 break; 656 } 657 658 if (!found) { 659 fprintf(stderr, "file is invalid or corrupted (missing end of central " 660 "directory record)\n"); 661 return false; 662 } 663 664 const u1* end_of_central_dir = current; 665 get_u4le(current); // central directory locator signature, already checked 666 u2 number_of_this_disk = get_u2le(current); 667 u2 disk_with_central_dir = get_u2le(current); 668 u2 central_dir_entries_on_this_disk = get_u2le(current); 669 u2 central_dir_entries = get_u2le(current); 670 u4 central_dir_size = get_u4le(current); 671 u4 central_dir_offset = get_u4le(current); 672 u2 file_comment_length = get_u2le(current); 673 current += file_comment_length; // set current to the end of the central dir 674 675 if (number_of_this_disk != 0 676 || disk_with_central_dir != 0 677 || central_dir_entries_on_this_disk != central_dir_entries) { 678 fprintf(stderr, "multi-disk JAR files are not supported\n"); 679 return false; 680 } 681 682 // Do not change output values before determining that they are OK. 683 *offset = central_dir_offset; 684 // Central directory start can then be used to determine the actual 685 // starts of the zip file (which can be different in case of a non-zip 686 // header like for auto-extractable binaries). 687 *central_dir = end_of_central_dir - central_dir_size; 688 return true; 689 } 690 691 void InputZipFile::Reset() { 692 central_dir_current_ = central_dir_; 693 zipdata_in_mapped_ = zipdata_in_; 694 p = zipdata_in_ + in_offset_; 695 } 696 697 int ZipExtractor::ProcessAll() { 698 while (ProcessNext()) {} 699 if (GetError() != NULL) { 700 return -1; 701 } 702 return 0; 703 } 704 705 ZipExtractor* ZipExtractor::Create(const char* filename, 706 ZipExtractorProcessor *processor) { 707 int fd_in = open(filename, O_RDONLY); 708 if (fd_in < 0) { 709 return NULL; 710 } 711 712 off_t length = lseek(fd_in, 0, SEEK_END); 713 if (length < 0) { 714 return NULL; 715 } 716 717 void *zipdata_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0); 718 if (zipdata_in == MAP_FAILED) { 719 return NULL; 720 } 721 722 u4 central_dir_offset; 723 const u1 *central_dir = NULL; 724 725 if (!devtools_ijar::FindZipCentralDirectory( 726 static_cast<const u1*>(zipdata_in), length, 727 ¢ral_dir_offset, ¢ral_dir)) { 728 errno = EIO; // we don't really have a good error number 729 return NULL; 730 } 731 const u1 *zipdata_start = static_cast<const u1*>(zipdata_in); 732 off_t offset = - static_cast<off_t>(zipdata_start 733 + central_dir_offset 734 - central_dir); 735 736 return new InputZipFile(processor, fd_in, length, offset, 737 zipdata_start, central_dir); 738 } 739 740 InputZipFile::InputZipFile(ZipExtractorProcessor *processor, int fd, 741 off_t in_length, off_t in_offset, 742 const u1* zipdata_in, const u1* central_dir) 743 : processor(processor), fd_in(fd), 744 zipdata_in_(zipdata_in), zipdata_in_mapped_(zipdata_in), 745 central_dir_(central_dir), in_length_(in_length), in_offset_(in_offset), 746 p(zipdata_in + in_offset), central_dir_current_(central_dir) { 747 uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE; 748 uncompressed_data_ = 749 reinterpret_cast<u1*>(malloc(uncompressed_data_allocated_)); 750 errmsg[0] = 0; 751 } 752 753 InputZipFile::~InputZipFile() { 754 free(uncompressed_data_); 755 close(fd_in); 756 } 757 758 759 // 760 // Implementation of OutputZipFile 761 // 762 int OutputZipFile::WriteEmptyFile(const char *filename) { 763 const u1* file_name = (const u1*) filename; 764 size_t file_name_length = strlen(filename); 765 766 LocalFileEntry *entry = new LocalFileEntry; 767 entry->local_header_offset = Offset(q); 768 entry->external_attr = 0; 769 entry->crc32 = 0; 770 771 // Output the ZIP local_file_header: 772 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); 773 put_u2le(q, 10); // extract_version 774 put_u2le(q, 0); // general_purpose_bit_flag 775 put_u2le(q, 0); // compression_method 776 put_u2le(q, 0); // last_mod_file_time 777 put_u2le(q, 0); // last_mod_file_date 778 put_u4le(q, entry->crc32); // crc32 779 put_u4le(q, 0); // compressed_size 780 put_u4le(q, 0); // uncompressed_size 781 put_u2le(q, file_name_length); 782 put_u2le(q, 0); // extra_field_length 783 put_n(q, file_name, file_name_length); 784 785 entry->file_name_length = file_name_length; 786 entry->extra_field_length = 0; 787 entry->compressed_length = 0; 788 entry->uncompressed_length = 0; 789 entry->compression_method = 0; 790 entry->extra_field = (const u1 *)""; 791 entry->file_name = (u1*) strdup((const char *) file_name); 792 entries_.push_back(entry); 793 794 return 0; 795 } 796 797 void OutputZipFile::WriteCentralDirectory() { 798 // central directory: 799 const u1 *central_directory_start = q; 800 for (size_t ii = 0; ii < entries_.size(); ++ii) { 801 LocalFileEntry *entry = entries_[ii]; 802 put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE); 803 put_u2le(q, 0); // version made by 804 805 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract 806 put_u2le(q, 0); // general purpose bit flag 807 put_u2le(q, entry->compression_method); // compression method: 808 put_u2le(q, 0); // last_mod_file_time 809 put_u2le(q, 0); // last_mod_file_date 810 put_u4le(q, entry->crc32); // crc32 811 put_u4le(q, entry->compressed_length); // compressed_size 812 put_u4le(q, entry->uncompressed_length); // uncompressed_size 813 put_u2le(q, entry->file_name_length); 814 put_u2le(q, entry->extra_field_length); 815 816 put_u2le(q, 0); // file comment length 817 put_u2le(q, 0); // disk number start 818 put_u2le(q, 0); // internal file attributes 819 put_u4le(q, entry->external_attr); // external file attributes 820 // relative offset of local header: 821 put_u4le(q, entry->local_header_offset); 822 823 put_n(q, entry->file_name, entry->file_name_length); 824 put_n(q, entry->extra_field, entry->extra_field_length); 825 } 826 u4 central_directory_size = q - central_directory_start; 827 828 put_u4le(q, END_OF_CENTRAL_DIR_SIGNATURE); 829 put_u2le(q, 0); // number of this disk 830 put_u2le(q, 0); // number of the disk with the start of the central directory 831 put_u2le(q, entries_.size()); // # central dir entries on this disk 832 put_u2le(q, entries_.size()); // total # entries in the central directory 833 put_u4le(q, central_directory_size); // size of the central directory 834 put_u4le(q, Offset(central_directory_start)); // offset of start of central 835 // directory wrt starting disk 836 put_u2le(q, 0); // .ZIP file comment length 837 } 838 839 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) { 840 off_t file_name_length_ = strlen(filename); 841 LocalFileEntry *entry = new LocalFileEntry; 842 entry->local_header_offset = Offset(q); 843 entry->file_name_length = file_name_length_; 844 entry->file_name = new u1[file_name_length_]; 845 entry->external_attr = attr; 846 memcpy(entry->file_name, filename, file_name_length_); 847 entry->extra_field_length = 0; 848 entry->extra_field = (const u1 *)""; 849 850 // Output the ZIP local_file_header: 851 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE); 852 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract 853 put_u2le(q, 0); // general purpose bit flag 854 u1 *header_ptr = q; 855 put_u2le(q, COMPRESSION_METHOD_STORED); // compression method = placeholder 856 put_u2le(q, 0); // last_mod_file_time 857 put_u2le(q, 0); // last_mod_file_date 858 put_u4le(q, entry->crc32); // crc32 859 put_u4le(q, 0); // compressed_size = placeholder 860 put_u4le(q, 0); // uncompressed_size = placeholder 861 put_u2le(q, entry->file_name_length); 862 put_u2le(q, entry->extra_field_length); 863 864 put_n(q, entry->file_name, entry->file_name_length); 865 put_n(q, entry->extra_field, entry->extra_field_length); 866 entries_.push_back(entry); 867 868 return header_ptr; 869 } 870 871 // Try to compress a file entry in memory using the deflate algorithm. 872 // It will compress buf (of size length) unless the compressed size is bigger 873 // than the input size. The result will overwrite the content of buf and the 874 // final size is returned. 875 size_t TryDeflate(u1 *buf, size_t length) { 876 u1 *outbuf = reinterpret_cast<u1 *>(malloc(length)); 877 z_stream stream; 878 879 // Initialize the z_stream strcut for reading from buf and wrinting in outbuf. 880 stream.zalloc = Z_NULL; 881 stream.zfree = Z_NULL; 882 stream.opaque = Z_NULL; 883 stream.total_in = length; 884 stream.avail_in = length; 885 stream.total_out = length; 886 stream.avail_out = length; 887 stream.next_in = buf; 888 stream.next_out = outbuf; 889 890 // deflateInit2 negative windows size prevent the zlib wrapper to be used. 891 if (deflateInit2(&stream, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 892 -MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) { 893 // Failure to compress => return the buffer uncompressed 894 free(outbuf); 895 return length; 896 } 897 898 if (deflate(&stream, Z_FINISH) == Z_STREAM_END) { 899 // Compression successful and fits in outbuf, let's copy the result in buf. 900 length = stream.total_out; 901 memcpy(buf, outbuf, length); 902 } 903 904 deflateEnd(&stream); 905 free(outbuf); 906 907 // Return the length of the resulting buffer 908 return length; 909 } 910 911 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr, 912 size_t out_length, 913 bool compress, 914 const u4 crc) { 915 size_t compressed_size = out_length; 916 if (compress) { 917 compressed_size = TryDeflate(q, out_length); 918 } 919 // compression method 920 if (compressed_size < out_length) { 921 put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED); 922 } else { 923 put_u2le(header_ptr, COMPRESSION_METHOD_STORED); 924 } 925 header_ptr += 4; 926 put_u4le(header_ptr, crc); // crc32 927 put_u4le(header_ptr, compressed_size); // compressed_size 928 put_u4le(header_ptr, out_length); // uncompressed_size 929 return compressed_size; 930 } 931 932 int OutputZipFile::Finish() { 933 if (fd_out > 0) { 934 WriteCentralDirectory(); 935 if (ftruncate(fd_out, GetSize()) < 0) { 936 return error("ftruncate(fd_out, GetSize()): %s", strerror(errno)); 937 } 938 if (close(fd_out) < 0) { 939 return error("close(fd_out): %s", strerror(errno)); 940 } 941 fd_out = -1; 942 } 943 return 0; 944 } 945 946 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) { 947 header_ptr = WriteLocalFileHeader(filename, attr); 948 return q; 949 } 950 951 int OutputZipFile::FinishFile(size_t filelength, bool compress, 952 bool compute_crc) { 953 u4 crc = 0; 954 if (compute_crc) { 955 crc = crc32(crc, q, filelength); 956 } 957 size_t compressed_size = 958 WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress, crc); 959 entries_.back()->crc32 = crc; 960 entries_.back()->compressed_length = compressed_size; 961 entries_.back()->uncompressed_length = filelength; 962 if (compressed_size < filelength) { 963 entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED; 964 } else { 965 entries_.back()->compression_method = COMPRESSION_METHOD_STORED; 966 } 967 q += compressed_size; 968 return 0; 969 } 970 971 ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) { 972 if (estimated_size > kMaximumOutputSize) { 973 fprintf(stderr, 974 "Uncompressed input jar has size %llu, " 975 "which exceeds the maximum supported output size %llu.\n" 976 "Assuming that ijar will be smaller and hoping for the best.\n", 977 estimated_size, kMaximumOutputSize); 978 estimated_size = kMaximumOutputSize; 979 } 980 981 int fd_out = open(zip_file, O_CREAT|O_RDWR|O_TRUNC, 0644); 982 if (fd_out < 0) { 983 return NULL; 984 } 985 986 // Create mmap-able sparse file 987 if (ftruncate(fd_out, estimated_size) < 0) { 988 return NULL; 989 } 990 991 // Ensure that any buffer overflow in JarStripper will result in 992 // SIGSEGV or SIGBUS by over-allocating beyond the end of the file. 993 size_t mmap_length = std::min(estimated_size + sysconf(_SC_PAGESIZE), 994 (u8) std::numeric_limits<size_t>::max()); 995 996 void *zipdata_out = mmap(NULL, mmap_length, PROT_WRITE, 997 MAP_SHARED, fd_out, 0); 998 if (zipdata_out == MAP_FAILED) { 999 fprintf(stderr, "output_length=%llu\n", estimated_size); 1000 return NULL; 1001 } 1002 1003 return new OutputZipFile(fd_out, (u1*) zipdata_out); 1004 } 1005 1006 u8 ZipBuilder::EstimateSize(char **files) { 1007 struct stat statst; 1008 // Digital signature field size = 6, End of central directory = 22, Total = 28 1009 u8 size = 28; 1010 // Count the size of all the files in the input to estimate the size of the 1011 // output. 1012 for (int i = 0; files[i] != NULL; i++) { 1013 if (stat(files[i], &statst) != 0) { 1014 fprintf(stderr, "File %s does not seem to exist.", files[i]); 1015 return 0; 1016 } 1017 size += statst.st_size; 1018 // Add sizes of Zip meta data 1019 // local file header = 30 bytes 1020 // data descriptor = 12 bytes 1021 // central directory descriptor = 46 bytes 1022 // Total: 88bytes 1023 size += 88; 1024 // The filename is stored twice (once in the central directory 1025 // and once in the local file header). 1026 size += strlen(files[i]) * 2; 1027 } 1028 return size; 1029 } 1030 1031 } // namespace devtools_ijar 1032