1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 17 package java.util.zip; 18 19 import java.io.ByteArrayOutputStream; 20 import java.io.IOException; 21 import java.io.RandomAccessFile; 22 import java.nio.BufferOverflowException; 23 import java.nio.BufferUnderflowException; 24 import java.nio.ByteBuffer; 25 import java.nio.ByteOrder; 26 27 import static java.util.zip.ZipOutputStream.writeIntAsUint16; 28 import static java.util.zip.ZipOutputStream.writeLongAsUint32; 29 import static java.util.zip.ZipOutputStream.writeLongAsUint64; 30 31 /** 32 * @hide 33 */ 34 public class Zip64 { 35 36 /* Non instantiable */ 37 private Zip64() {} 38 39 /** 40 * The maximum supported entry / archive size for standard (non zip64) entries and archives. 41 * 42 * @hide 43 */ 44 public static final long MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE = 0x00000000ffffffffL; 45 46 /** 47 * The header ID of the zip64 extended info header. This value is used to identify 48 * zip64 data in the "extra" field in the file headers. 49 */ 50 private static final short ZIP64_EXTENDED_INFO_HEADER_ID = 0x0001; 51 52 53 /* 54 * Size (in bytes) of the zip64 end of central directory locator. This will be located 55 * immediately before the end of central directory record if a given zipfile is in the 56 * zip64 format. 57 */ 58 private static final int ZIP64_LOCATOR_SIZE = 20; 59 60 /** 61 * The zip64 end of central directory locator signature (4 bytes wide). 62 */ 63 private static final int ZIP64_LOCATOR_SIGNATURE = 0x07064b50; 64 65 /** 66 * The zip64 end of central directory record singature (4 bytes wide). 67 */ 68 private static final int ZIP64_EOCD_RECORD_SIGNATURE = 0x06064b50; 69 70 /** 71 * The "effective" size of the zip64 eocd record. This excludes the fields that 72 * are proprietary, signature, or fields we aren't interested in. We include the 73 * following (contiguous) fields in this calculation : 74 * - disk number (4 bytes) 75 * - disk with start of central directory (4 bytes) 76 * - number of central directory entries on this disk (8 bytes) 77 * - total number of central directory entries (8 bytes) 78 * - size of the central directory (8 bytes) 79 * - offset of the start of the central directory (8 bytes) 80 */ 81 private static final int ZIP64_EOCD_RECORD_EFFECTIVE_SIZE = 40; 82 83 /** 84 * Parses the zip64 end of central directory record locator. The locator 85 * must be placed immediately before the end of central directory (eocd) record 86 * starting at {@code eocdOffset}. 87 * 88 * The position of the file cursor for {@code raf} after a call to this method 89 * is undefined an callers must reposition it after each call to this method. 90 */ 91 public static long parseZip64EocdRecordLocator(RandomAccessFile raf, long eocdOffset) 92 throws IOException { 93 // The spec stays curiously silent about whether a zip file with an EOCD record, 94 // a zip64 locator and a zip64 eocd record is considered "empty". In our implementation, 95 // we parse all records and read the counts from them instead of drawing any size or 96 // layout based information. 97 if (eocdOffset > ZIP64_LOCATOR_SIZE) { 98 raf.seek(eocdOffset - ZIP64_LOCATOR_SIZE); 99 if (Integer.reverseBytes(raf.readInt()) == ZIP64_LOCATOR_SIGNATURE) { 100 byte[] zip64EocdLocator = new byte[ZIP64_LOCATOR_SIZE - 4]; 101 raf.readFully(zip64EocdLocator); 102 ByteBuffer buf = ByteBuffer.wrap(zip64EocdLocator).order(ByteOrder.LITTLE_ENDIAN); 103 104 final int diskWithCentralDir = buf.getInt(); 105 final long zip64EocdRecordOffset = buf.getLong(); 106 final int numDisks = buf.getInt(); 107 108 if (numDisks != 1 || diskWithCentralDir != 0) { 109 throw new ZipException("Spanned archives not supported"); 110 } 111 112 return zip64EocdRecordOffset; 113 } 114 } 115 116 return -1; 117 } 118 119 public static ZipFile.EocdRecord parseZip64EocdRecord(RandomAccessFile raf, 120 long eocdRecordOffset, int commentLength) throws IOException { 121 raf.seek(eocdRecordOffset); 122 final int signature = Integer.reverseBytes(raf.readInt()); 123 if (signature != ZIP64_EOCD_RECORD_SIGNATURE) { 124 throw new ZipException("Invalid zip64 eocd record offset, sig=" 125 + Integer.toHexString(signature) + " offset=" + eocdRecordOffset); 126 } 127 128 // The zip64 eocd record specifies its own size as an 8 byte integral type. It is variable 129 // length because of the "zip64 extensible data sector" but that field is reserved for 130 // pkware's proprietary use. We therefore disregard it altogether and treat the end of 131 // central directory structure as fixed length. 132 // 133 // We also skip "version made by" (2 bytes) and "version needed to extract" (2 bytes) 134 // fields. We perform additional validation at the ZipEntry level, where applicable. 135 // 136 // That's a total of 12 bytes to skip 137 raf.skipBytes(12); 138 139 byte[] zip64Eocd = new byte[ZIP64_EOCD_RECORD_EFFECTIVE_SIZE]; 140 raf.readFully(zip64Eocd); 141 142 ByteBuffer buf = ByteBuffer.wrap(zip64Eocd).order(ByteOrder.LITTLE_ENDIAN); 143 try { 144 int diskNumber = buf.getInt(); 145 int diskWithCentralDirStart = buf.getInt(); 146 long numEntries = buf.getLong(); 147 long totalNumEntries = buf.getLong(); 148 buf.getLong(); // Ignore the size of the central directory 149 long centralDirOffset = buf.getLong(); 150 151 if (numEntries != totalNumEntries || diskNumber != 0 || diskWithCentralDirStart != 0) { 152 throw new ZipException("Spanned archives not supported :" + 153 " numEntries=" + numEntries + ", totalNumEntries=" + totalNumEntries + 154 ", diskNumber=" + diskNumber + ", diskWithCentralDirStart=" + 155 diskWithCentralDirStart); 156 } 157 158 return new ZipFile.EocdRecord(numEntries, centralDirOffset, commentLength); 159 } catch (BufferUnderflowException bue) { 160 ZipException zipException = new ZipException("Error parsing zip64 eocd record."); 161 zipException.initCause(bue); 162 throw zipException; 163 } 164 } 165 166 /** 167 * Parse the zip64 extended info record from the extras present in {@code ze}. 168 * 169 * If {@code fromCentralDirectory} is true, we assume we're parsing a central directory 170 * record. We assume a local file header otherwise. The difference between the two is that 171 * a central directory entry is required to be complete, whereas a local file header isn't. 172 * This is due to the presence of an optional data descriptor after the file content. 173 * 174 * @return {@code} true iff. a zip64 extended info record was found. 175 */ 176 public static boolean parseZip64ExtendedInfo(ZipEntry ze, boolean fromCentralDirectory) 177 throws ZipException { 178 int extendedInfoSize = -1; 179 int extendedInfoStart = -1; 180 // If this file contains a zip64 central directory locator, entries might 181 // optionally contain a zip64 extended information extra entry. 182 if (ze.extra != null && ze.extra.length > 0) { 183 // Extensible data fields are of the form header1+data1 + header2+data2 and so 184 // on, where each header consists of a 2 byte header ID followed by a 2 byte size. 185 // We need to iterate through the entire list of headers to find the header ID 186 // for the zip64 extended information extra field (0x0001). 187 final ByteBuffer buf = ByteBuffer.wrap(ze.extra).order(ByteOrder.LITTLE_ENDIAN); 188 extendedInfoSize = getZip64ExtendedInfoSize(buf); 189 if (extendedInfoSize != -1) { 190 extendedInfoStart = buf.position(); 191 try { 192 // The size & compressed size only make sense in the central directory *or* if 193 // we know them beforehand. If we don't know them beforehand, they're stored in 194 // the data descriptor and should be read from there. 195 // 196 // Note that the spec says that the local file header "MUST" contain the 197 // original and compressed size fields. We don't care too much about that. 198 // The spec claims that the order of fields is fixed anyway. 199 if (fromCentralDirectory || (ze.getMethod() == ZipEntry.STORED)) { 200 if (ze.size == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) { 201 ze.size = buf.getLong(); 202 } 203 204 if (ze.compressedSize == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) { 205 ze.compressedSize = buf.getLong(); 206 } 207 } 208 209 // The local header offset is significant only in the central directory. It makes no 210 // sense within the local header itself. 211 if (fromCentralDirectory) { 212 if (ze.localHeaderRelOffset == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) { 213 ze.localHeaderRelOffset = buf.getLong(); 214 } 215 } 216 } catch (BufferUnderflowException bue) { 217 ZipException zipException = new ZipException("Error parsing extended info"); 218 zipException.initCause(bue); 219 throw zipException; 220 } 221 } 222 } 223 224 // This entry doesn't contain a zip64 extended information data entry header. 225 // We have to check that the compressedSize / size / localHeaderRelOffset values 226 // are valid and don't require the presence of the extended header. 227 if (extendedInfoSize == -1) { 228 if (ze.compressedSize == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE || 229 ze.size == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE || 230 ze.localHeaderRelOffset == MAX_ZIP_ENTRY_AND_ARCHIVE_SIZE) { 231 throw new ZipException("File contains no zip64 extended information: " 232 + "name=" + ze.name + "compressedSize=" + ze.compressedSize + ", size=" 233 + ze.size + ", localHeader=" + ze.localHeaderRelOffset); 234 } 235 236 return false; 237 } else { 238 // If we're parsed the zip64 extended info header, we remove it from the extras 239 // so that applications that set their own extras will see the data they set. 240 241 // This is an unfortunate workaround needed due to a gap in the spec. The spec demands 242 // that extras are present in the "extensible" format, which means that each extra field 243 // must be prefixed with a header ID and a length. However, earlier versions of the spec 244 // made no mention of this, nor did any existing API enforce it. This means users could 245 // set "free form" extras without caring very much whether the implementation wanted to 246 // extend or add to them. 247 248 // The start of the extended info header. 249 final int extendedInfoHeaderStart = extendedInfoStart - 4; 250 // The total size of the extended info, including the header. 251 final int extendedInfoTotalSize = extendedInfoSize + 4; 252 253 final int extrasLen = ze.extra.length - extendedInfoTotalSize; 254 byte[] extrasWithoutZip64 = new byte[extrasLen]; 255 256 System.arraycopy(ze.extra, 0, extrasWithoutZip64, 0, extendedInfoHeaderStart); 257 System.arraycopy(ze.extra, extendedInfoHeaderStart + extendedInfoTotalSize, 258 extrasWithoutZip64, extendedInfoHeaderStart, (extrasLen - extendedInfoHeaderStart)); 259 260 ze.extra = extrasWithoutZip64; 261 return true; 262 } 263 } 264 265 /** 266 * Appends a zip64 extended info record to the extras contained in {@code ze}. If {@code ze} 267 * contains no extras, a new extras array is created. 268 */ 269 public static void insertZip64ExtendedInfoToExtras(ZipEntry ze) throws ZipException { 270 final byte[] output; 271 // We always write the size, uncompressed size and local rel header offset in all our 272 // Zip64 extended info headers (in both the local file header as well as the central 273 // directory). We always omit the disk number because we don't support spanned 274 // archives anyway. 275 // 276 // 2 bytes : Zip64 Extended Info Header ID 277 // 2 bytes : Zip64 Extended Info Field Size. 278 // 8 bytes : Uncompressed size 279 // 8 bytes : Compressed size 280 // 8 bytes : Local header rel offset. 281 // ---------- 282 // 28 bytes : total 283 final int extendedInfoSize = 28; 284 285 if (ze.extra == null) { 286 output = new byte[extendedInfoSize]; 287 } else { 288 // If the existing extras are already too big, we have no choice but to throw 289 // an error. 290 if (ze.extra.length + extendedInfoSize > 65535) { 291 throw new ZipException("No space in extras for zip64 extended entry info"); 292 } 293 294 // We copy existing extras over and put the zip64 extended info at the beginning. This 295 // is to avoid breakages in the presence of "old style" extras which don't contain 296 // headers and lengths. The spec is again silent about these inconsistencies. 297 // 298 // This means that people that for ZipOutputStream users, the value ZipEntry.getExtra 299 // after an entry is written will be different from before. This shouldn't be an issue 300 // in practice. 301 output = new byte[ze.extra.length + extendedInfoSize]; 302 System.arraycopy(ze.extra, 0, output, extendedInfoSize, ze.extra.length); 303 } 304 305 ByteBuffer bb = ByteBuffer.wrap(output).order(ByteOrder.LITTLE_ENDIAN); 306 bb.putShort(ZIP64_EXTENDED_INFO_HEADER_ID); 307 // We subtract four because extendedInfoSize includes the ID and field 308 // size itself. 309 bb.putShort((short) (extendedInfoSize - 4)); 310 311 if (ze.getMethod() == ZipEntry.STORED) { 312 bb.putLong(ze.size); 313 bb.putLong(ze.compressedSize); 314 } else { 315 // Store these fields in the data descriptor instead. 316 bb.putLong(0); // size. 317 bb.putLong(0); // compressed size. 318 } 319 320 // The offset is only relevant in the central directory entry, but we write it out here 321 // anyway, since we know what it is. 322 bb.putLong(ze.localHeaderRelOffset); 323 324 ze.extra = output; 325 } 326 327 /** 328 * Returns the size of the extended info record if {@code extras} contains a zip64 extended info 329 * record, {@code -1} otherwise. The buffer will be positioned at the start of the extended info 330 * record. 331 */ 332 private static int getZip64ExtendedInfoSize(ByteBuffer extras) { 333 try { 334 while (extras.hasRemaining()) { 335 final int headerId = extras.getShort() & 0xffff; 336 final int length = extras.getShort() & 0xffff; 337 if (headerId == ZIP64_EXTENDED_INFO_HEADER_ID) { 338 if (extras.remaining() >= length) { 339 return length; 340 } else { 341 return -1; 342 } 343 } else { 344 extras.position(extras.position() + length); 345 } 346 } 347 348 return -1; 349 } catch (BufferUnderflowException bue) { 350 // We'll underflow if we have an incomplete header in our extras. 351 return -1; 352 } catch (IllegalArgumentException iae) { 353 // ByteBuffer.position() will throw if we have a truncated extra or 354 // an invalid length in the header. 355 return -1; 356 } 357 } 358 359 /** 360 * Copy the size, compressed size and local header offset fields from {@code ze} to 361 * inside {@code ze}'s extended info record. This is additional step is necessary when 362 * we could calculate the correct sizes only after writing out the entry. In this case, 363 * the local file header would not contain real sizes, and they would be present in the 364 * data descriptor and the central directory only. 365 * 366 * We choose the simplest strategy of always writing out the size, compressedSize and 367 * local header offset in all our Zip64 Extended info records. 368 */ 369 public static void refreshZip64ExtendedInfo(ZipEntry ze) { 370 if (ze.extra == null) { 371 throw new IllegalStateException("Zip64 entry has no available extras: " + ze); 372 } 373 374 ByteBuffer buf = ByteBuffer.wrap(ze.extra).order(ByteOrder.LITTLE_ENDIAN); 375 final int extendedInfoSize = getZip64ExtendedInfoSize(buf); 376 if (extendedInfoSize == -1) { 377 throw new IllegalStateException( 378 "Zip64 entry extras has no zip64 extended info record: " + ze); 379 } 380 381 try { 382 buf.putLong(ze.size); 383 buf.putLong(ze.compressedSize); 384 buf.putLong(ze.localHeaderRelOffset); 385 } catch (BufferOverflowException boe) { 386 throw new IllegalStateException("Invalid extended info extra", boe); 387 } 388 } 389 390 public static void writeZip64EocdRecordAndLocator(ByteArrayOutputStream baos, 391 long numEntries, long offset, long cDirSize) throws IOException { 392 // Step 1: Write out the zip64 EOCD record. 393 writeLongAsUint32(baos, ZIP64_EOCD_RECORD_SIGNATURE); 394 // The size of the zip64 eocd record. This is the effective size + the 395 // size of the "version made by" (2 bytes) and the "version needed to extract" (2 bytes) 396 // fields. 397 writeLongAsUint64(baos, ZIP64_EOCD_RECORD_EFFECTIVE_SIZE + 4); 398 // TODO: What values should we put here ? The pre-zip64 values we've chosen don't 399 // seem to make much sense either. 400 writeIntAsUint16(baos, 20); 401 writeIntAsUint16(baos, 20); 402 writeLongAsUint32(baos, 0L); // number of disk 403 writeLongAsUint32(baos, 0L); // number of disk with start of central dir. 404 writeLongAsUint64(baos, numEntries); // number of entries in this disk. 405 writeLongAsUint64(baos, numEntries); // number of entries in total. 406 writeLongAsUint64(baos, cDirSize); // size of the central directory. 407 writeLongAsUint64(baos, offset); // offset of the central directory wrt. this file. 408 409 // Step 2: Write out the zip64 EOCD record locator. 410 writeLongAsUint32(baos, ZIP64_LOCATOR_SIGNATURE); 411 writeLongAsUint32(baos, 0); // number of disk with start of central dir. 412 writeLongAsUint64(baos, offset + cDirSize); // offset of the eocd record wrt. this file. 413 writeLongAsUint32(baos, 1); // total number of disks. 414 } 415 } 416