Home | History | Annotate | Download | only in zip
      1 /*
      2  * Copyright (C) 2015 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License
     15  */
     16 
     17 package java.util.zip;
     18 
     19 import java.io.ByteArrayOutputStream;
     20 import java.io.IOException;
     21 import java.io.RandomAccessFile;
     22 import java.nio.BufferOverflowException;
     23 import java.nio.BufferUnderflowException;
     24 import java.nio.ByteBuffer;
     25 import java.nio.ByteOrder;
     26 
     27 import static java.util.zip.ZipOutputStream.writeIntAsUint16;
     28 import static java.util.zip.ZipOutputStream.writeLongAsUint32;
     29 import static java.util.zip.ZipOutputStream.writeLongAsUint64;
     30 
     31 /**
     32  * @hide
     33  */
     34 public class Zip64 {
     35 
     36     /* Non instantiable */
     37     private Zip64() {}
     38 
     39     /**
     40      * The maximum supported entry / archive size for standard (non zip64) entries and archives.
     41      *
     42      * @hide
     43      */
     44     public static final long MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE = 0x00000000ffffffffL;
     45 
     46     /**
     47      * The header ID of the zip64 extended info header. This value is used to identify
     48      * zip64 data in the "extra" field in the file headers.
     49      */
     50     private static final short ZIP64_EXTENDED_INFO_HEADER_ID = 0x0001;
     51 
     52 
     53     /*
     54      * Size (in bytes) of the zip64 end of central directory locator. This will be located
     55      * immediately before the end of central directory record if a given zipfile is in the
     56      * zip64 format.
     57      */
     58     private static final int ZIP64_LOCATOR_SIZE = 20;
     59 
     60     /**
     61      * The zip64 end of central directory locator signature (4 bytes wide).
     62      */
     63     private static final int ZIP64_LOCATOR_SIGNATURE = 0x07064b50;
     64 
     65     /**
     66      * The zip64 end of central directory record singature (4 bytes wide).
     67      */
     68     private static final int ZIP64_EOCD_RECORD_SIGNATURE = 0x06064b50;
     69 
     70     /**
     71      * The "effective" size of the zip64 eocd record. This excludes the fields that
     72      * are proprietary, signature, or fields we aren't interested in. We include the
     73      * following (contiguous) fields in this calculation :
     74      * - disk number (4 bytes)
     75      * - disk with start of central directory (4 bytes)
     76      * - number of central directory entries on this disk (8 bytes)
     77      * - total number of central directory entries (8 bytes)
     78      * - size of the central directory (8 bytes)
     79      * - offset of the start of the central directory (8 bytes)
     80      */
     81     private static final int ZIP64_EOCD_RECORD_EFFECTIVE_SIZE = 40;
     82 
     83     /**
     84      * Parses the zip64 end of central directory record locator. The locator
     85      * must be placed immediately before the end of central directory (eocd) record
     86      * starting at {@code eocdOffset}.
     87      *
     88      * The position of the file cursor for {@code raf} after a call to this method
     89      * is undefined an callers must reposition it after each call to this method.
     90      */
     91     public static long parseZip64EocdRecordLocator(RandomAccessFile raf, long eocdOffset)
     92             throws IOException {
     93         // The spec stays curiously silent about whether a zip file with an EOCD record,
     94         // a zip64 locator and a zip64 eocd record is considered "empty". In our implementation,
     95         // we parse all records and read the counts from them instead of drawing any size or
     96         // layout based information.
     97         if (eocdOffset > ZIP64_LOCATOR_SIZE) {
     98             raf.seek(eocdOffset - ZIP64_LOCATOR_SIZE);
     99             if (Integer.reverseBytes(raf.readInt()) == ZIP64_LOCATOR_SIGNATURE) {
    100                 byte[] zip64EocdLocator = new byte[ZIP64_LOCATOR_SIZE  - 4];
    101                 raf.readFully(zip64EocdLocator);
    102                 ByteBuffer buf = ByteBuffer.wrap(zip64EocdLocator).order(ByteOrder.LITTLE_ENDIAN);
    103 
    104                 final int diskWithCentralDir = buf.getInt();
    105                 final long zip64EocdRecordOffset = buf.getLong();
    106                 final int numDisks = buf.getInt();
    107 
    108                 if (numDisks != 1 || diskWithCentralDir != 0) {
    109                     throw new ZipException("Spanned archives not supported");
    110                 }
    111 
    112                 return zip64EocdRecordOffset;
    113             }
    114         }
    115 
    116         return -1;
    117     }
    118 
    119     public static ZipFile.EocdRecord parseZip64EocdRecord(RandomAccessFile raf,
    120             long eocdRecordOffset, int commentLength) throws IOException {
    121         raf.seek(eocdRecordOffset);
    122         final int signature = Integer.reverseBytes(raf.readInt());
    123         if (signature != ZIP64_EOCD_RECORD_SIGNATURE) {
    124             throw new ZipException("Invalid zip64 eocd record offset, sig="
    125                     + Integer.toHexString(signature) + " offset=" + eocdRecordOffset);
    126         }
    127 
    128         // The zip64 eocd record specifies its own size as an 8 byte integral type. It is variable
    129         // length because of the "zip64 extensible data sector" but that field is reserved for
    130         // pkware's proprietary use. We therefore disregard it altogether and treat the end of
    131         // central directory structure as fixed length.
    132         //
    133         // We also skip "version made by" (2 bytes) and "version needed to extract" (2 bytes)
    134         // fields. We perform additional validation at the ZipEntry level, where applicable.
    135         //
    136         // That's a total of 12 bytes to skip
    137         raf.skipBytes(12);
    138 
    139         byte[] zip64Eocd = new byte[ZIP64_EOCD_RECORD_EFFECTIVE_SIZE];
    140         raf.readFully(zip64Eocd);
    141 
    142         ByteBuffer buf = ByteBuffer.wrap(zip64Eocd).order(ByteOrder.LITTLE_ENDIAN);
    143         try {
    144             int diskNumber = buf.getInt();
    145             int diskWithCentralDirStart = buf.getInt();
    146             long numEntries = buf.getLong();
    147             long totalNumEntries = buf.getLong();
    148             buf.getLong(); // Ignore the size of the central directory
    149             long centralDirOffset = buf.getLong();
    150 
    151             if (numEntries != totalNumEntries || diskNumber != 0 || diskWithCentralDirStart != 0) {
    152                 throw new ZipException("Spanned archives not supported :" +
    153                         " numEntries=" + numEntries + ", totalNumEntries=" + totalNumEntries +
    154                         ", diskNumber=" + diskNumber + ", diskWithCentralDirStart=" +
    155                         diskWithCentralDirStart);
    156             }
    157 
    158             return new ZipFile.EocdRecord(numEntries, centralDirOffset, commentLength);
    159         } catch (BufferUnderflowException bue) {
    160             ZipException zipException = new ZipException("Error parsing zip64 eocd record.");
    161             zipException.initCause(bue);
    162             throw zipException;
    163         }
    164     }
    165 
    166     /**
    167      * Parse the zip64 extended info record from the extras present in {@code ze}.
    168      *
    169      * If {@code fromCentralDirectory} is true, we assume we're parsing a central directory
    170      * record. We assume a local file header otherwise. The difference between the two is that
    171      * a central directory entry is required to be complete, whereas a local file header isn't.
    172      * This is due to the presence of an optional data descriptor after the file content.
    173      *
    174      * @return {@code} true iff. a zip64 extended info record was found.
    175      */
    176     public static boolean parseZip64ExtendedInfo(ZipEntry ze, boolean fromCentralDirectory)
    177             throws ZipException {
    178         int extendedInfoSize = -1;
    179         int extendedInfoStart = -1;
    180         // If this file contains a zip64 central directory locator, entries might
    181         // optionally contain a zip64 extended information extra entry.
    182         if (ze.extra != null && ze.extra.length > 0) {
    183             // Extensible data fields are of the form header1+data1 + header2+data2 and so
    184             // on, where each header consists of a 2 byte header ID followed by a 2 byte size.
    185             // We need to iterate through the entire list of headers to find the header ID
    186             // for the zip64 extended information extra field (0x0001).
    187             final ByteBuffer buf = ByteBuffer.wrap(ze.extra).order(ByteOrder.LITTLE_ENDIAN);
    188             extendedInfoSize = getZip64ExtendedInfoSize(buf);
    189             if (extendedInfoSize != -1) {
    190                 extendedInfoStart = buf.position();
    191                 try {
    192                     // The size & compressed size only make sense in the central directory *or* if
    193                     // we know them beforehand. If we don't know them beforehand, they're stored in
    194                     // the data descriptor and should be read from there.
    195                     //
    196                     // Note that the spec says that the local file header "MUST" contain the
    197                     // original and compressed size fields. We don't care too much about that.
    198                     // The spec claims that the order of fields is fixed anyway.
    199                     if (fromCentralDirectory || (ze.getMethod() == ZipEntry.STORED)) {
    200                         if (ze.size == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) {
    201                             ze.size = buf.getLong();
    202                         }
    203 
    204                         if (ze.compressedSize == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) {
    205                             ze.compressedSize = buf.getLong();
    206                         }
    207                     }
    208 
    209                     // The local header offset is significant only in the central directory. It makes no
    210                     // sense within the local header itself.
    211                     if (fromCentralDirectory) {
    212                         if (ze.localHeaderRelOffset == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) {
    213                             ze.localHeaderRelOffset = buf.getLong();
    214                         }
    215                     }
    216                 } catch (BufferUnderflowException bue) {
    217                     ZipException zipException = new ZipException("Error parsing extended info");
    218                     zipException.initCause(bue);
    219                     throw zipException;
    220                 }
    221             }
    222         }
    223 
    224         // This entry doesn't contain a zip64 extended information data entry header.
    225         // We have to check that the compressedSize / size / localHeaderRelOffset values
    226         // are valid and don't require the presence of the extended header.
    227         if (extendedInfoSize == -1) {
    228             if (ze.compressedSize == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE ||
    229                     ze.size == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE ||
    230                     ze.localHeaderRelOffset == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) {
    231                 throw new ZipException("File contains no zip64 extended information: "
    232                         + "name=" + ze.name + "compressedSize=" + ze.compressedSize + ", size="
    233                         + ze.size + ", localHeader=" + ze.localHeaderRelOffset);
    234             }
    235 
    236             return false;
    237         } else {
    238             // If we're parsed the zip64 extended info header, we remove it from the extras
    239             // so that applications that set their own extras will see the data they set.
    240 
    241             // This is an unfortunate workaround needed due to a gap in the spec. The spec demands
    242             // that extras are present in the "extensible" format, which means that each extra field
    243             // must be prefixed with a header ID and a length. However, earlier versions of the spec
    244             // made no mention of this, nor did any existing API enforce it. This means users could
    245             // set "free form" extras without caring very much whether the implementation wanted to
    246             // extend or add to them.
    247 
    248             // The start of the extended info header.
    249             final int extendedInfoHeaderStart = extendedInfoStart - 4;
    250             // The total size of the extended info, including the header.
    251             final int extendedInfoTotalSize = extendedInfoSize + 4;
    252 
    253             final int extrasLen = ze.extra.length - extendedInfoTotalSize;
    254             byte[] extrasWithoutZip64 = new byte[extrasLen];
    255 
    256             System.arraycopy(ze.extra, 0, extrasWithoutZip64, 0, extendedInfoHeaderStart);
    257             System.arraycopy(ze.extra, extendedInfoHeaderStart + extendedInfoTotalSize,
    258                     extrasWithoutZip64, extendedInfoHeaderStart, (extrasLen - extendedInfoHeaderStart));
    259 
    260             ze.extra = extrasWithoutZip64;
    261             return true;
    262         }
    263     }
    264 
    265     /**
    266      * Appends a zip64 extended info record to the extras contained in {@code ze}. If {@code ze}
    267      * contains no extras, a new extras array is created.
    268      */
    269     public static void insertZip64ExtendedInfoToExtras(ZipEntry ze) throws ZipException {
    270         final byte[] output;
    271         // We always write the size, uncompressed size and local rel header offset in all our
    272         // Zip64 extended info headers (in both the local file header as well as the central
    273         // directory). We always omit the disk number because we don't support spanned
    274         // archives anyway.
    275         //
    276         //  2 bytes : Zip64 Extended Info Header ID
    277         //  2 bytes : Zip64 Extended Info Field Size.
    278         //  8 bytes : Uncompressed size
    279         //  8 bytes : Compressed size
    280         //  8 bytes : Local header rel offset.
    281         // ----------
    282         // 28 bytes : total
    283         final int extendedInfoSize = 28;
    284 
    285         if (ze.extra == null) {
    286             output = new byte[extendedInfoSize];
    287         } else {
    288             // If the existing extras are already too big, we have no choice but to throw
    289             // an error.
    290             if (ze.extra.length + extendedInfoSize > 65535) {
    291                 throw new ZipException("No space in extras for zip64 extended entry info");
    292             }
    293 
    294             // We copy existing extras over and put the zip64 extended info at the beginning. This
    295             // is to avoid breakages in the presence of "old style" extras which don't contain
    296             // headers and lengths. The spec is again silent about these inconsistencies.
    297             //
    298             // This means that people that for ZipOutputStream users, the value ZipEntry.getExtra
    299             // after an entry is written will be different from before. This shouldn't be an issue
    300             // in practice.
    301             output = new byte[ze.extra.length + extendedInfoSize];
    302             System.arraycopy(ze.extra, 0, output,  extendedInfoSize, ze.extra.length);
    303         }
    304 
    305         ByteBuffer bb = ByteBuffer.wrap(output).order(ByteOrder.LITTLE_ENDIAN);
    306         bb.putShort(ZIP64_EXTENDED_INFO_HEADER_ID);
    307         // We subtract four because extendedInfoSize includes the ID and field
    308         // size itself.
    309         bb.putShort((short) (extendedInfoSize - 4));
    310 
    311         if (ze.getMethod() == ZipEntry.STORED) {
    312             bb.putLong(ze.size);
    313             bb.putLong(ze.compressedSize);
    314         } else {
    315             // Store these fields in the data descriptor instead.
    316             bb.putLong(0); // size.
    317             bb.putLong(0); // compressed size.
    318         }
    319 
    320         // The offset is only relevant in the central directory entry, but we write it out here
    321         // anyway, since we know what it is.
    322         bb.putLong(ze.localHeaderRelOffset);
    323 
    324         ze.extra = output;
    325     }
    326 
    327     /**
    328      * Returns the size of the extended info record if {@code extras} contains a zip64 extended info
    329      * record, {@code -1} otherwise. The buffer will be positioned at the start of the extended info
    330      * record.
    331      */
    332     private static int getZip64ExtendedInfoSize(ByteBuffer extras) {
    333         try {
    334             while (extras.hasRemaining()) {
    335                 final int headerId = extras.getShort() & 0xffff;
    336                 final int length = extras.getShort() & 0xffff;
    337                 if (headerId == ZIP64_EXTENDED_INFO_HEADER_ID) {
    338                     if (extras.remaining() >= length) {
    339                         return length;
    340                     } else {
    341                         return -1;
    342                     }
    343                 } else {
    344                     extras.position(extras.position() + length);
    345                 }
    346             }
    347 
    348             return -1;
    349         } catch (BufferUnderflowException bue) {
    350             // We'll underflow if we have an incomplete header in our extras.
    351             return -1;
    352         } catch (IllegalArgumentException iae) {
    353             // ByteBuffer.position() will throw if we have a truncated extra or
    354             // an invalid length in the header.
    355             return -1;
    356         }
    357     }
    358 
    359     /**
    360      * Copy the size, compressed size and local header offset fields from {@code ze} to
    361      * inside {@code ze}'s extended info record. This is additional step is necessary when
    362      * we could calculate the correct sizes only after writing out the entry. In this case,
    363      * the local file header would not contain real sizes, and they would be present in the
    364      * data descriptor and the central directory only.
    365      *
    366      * We choose the simplest strategy of always writing out the size, compressedSize and
    367      * local header offset in all our Zip64 Extended info records.
    368      */
    369     public static void refreshZip64ExtendedInfo(ZipEntry ze) {
    370         if (ze.extra == null) {
    371             throw new IllegalStateException("Zip64 entry has no available extras: " + ze);
    372         }
    373 
    374         ByteBuffer buf = ByteBuffer.wrap(ze.extra).order(ByteOrder.LITTLE_ENDIAN);
    375         final int extendedInfoSize = getZip64ExtendedInfoSize(buf);
    376         if (extendedInfoSize == -1) {
    377             throw new IllegalStateException(
    378                     "Zip64 entry extras has no zip64 extended info record: " + ze);
    379         }
    380 
    381         try {
    382             buf.putLong(ze.size);
    383             buf.putLong(ze.compressedSize);
    384             buf.putLong(ze.localHeaderRelOffset);
    385         } catch (BufferOverflowException boe) {
    386             throw new IllegalStateException("Invalid extended info extra", boe);
    387         }
    388     }
    389 
    390     public static void writeZip64EocdRecordAndLocator(ByteArrayOutputStream baos,
    391             long numEntries, long offset, long cDirSize) throws IOException {
    392         // Step 1: Write out the zip64 EOCD record.
    393         writeLongAsUint32(baos, ZIP64_EOCD_RECORD_SIGNATURE);
    394         // The size of the zip64 eocd record. This is the effective size + the
    395         // size of the "version made by" (2 bytes) and the "version needed to extract" (2 bytes)
    396         // fields.
    397         writeLongAsUint64(baos, ZIP64_EOCD_RECORD_EFFECTIVE_SIZE + 4);
    398         // TODO: What values should we put here ? The pre-zip64 values we've chosen don't
    399         // seem to make much sense either.
    400         writeIntAsUint16(baos, 20);
    401         writeIntAsUint16(baos, 20);
    402         writeLongAsUint32(baos, 0L); // number of disk
    403         writeLongAsUint32(baos, 0L); // number of disk with start of central dir.
    404         writeLongAsUint64(baos, numEntries); // number of entries in this disk.
    405         writeLongAsUint64(baos, numEntries); // number of entries in total.
    406         writeLongAsUint64(baos, cDirSize); // size of the central directory.
    407         writeLongAsUint64(baos, offset); // offset of the central directory wrt. this file.
    408 
    409         // Step 2: Write out the zip64 EOCD record locator.
    410         writeLongAsUint32(baos, ZIP64_LOCATOR_SIGNATURE);
    411         writeLongAsUint32(baos, 0); // number of disk with start of central dir.
    412         writeLongAsUint64(baos, offset + cDirSize); // offset of the eocd record wrt. this file.
    413         writeLongAsUint32(baos, 1); // total number of disks.
    414     }
    415 }
    416