Home | History | Annotate | Download | only in archivers
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one
      3  * or more contributor license agreements.  See the NOTICE file
      4  * distributed with this work for additional information
      5  * regarding copyright ownership.  The ASF licenses this file
      6  * to you under the Apache License, Version 2.0 (the
      7  * "License"); you may not use this file except in compliance
      8  * with the License.  You may obtain a copy of the License at
      9  *
     10  * http://www.apache.org/licenses/LICENSE-2.0
     11  *
     12  * Unless required by applicable law or agreed to in writing,
     13  * software distributed under the License is distributed on an
     14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
     15  * KIND, either express or implied.  See the License for the
     16  * specific language governing permissions and limitations
     17  * under the License.
     18  */
     19 package org.apache.commons.compress.archivers;
     20 
     21 import java.io.ByteArrayInputStream;
     22 import java.io.IOException;
     23 import java.io.InputStream;
     24 import java.io.OutputStream;
     25 import java.security.AccessController;
     26 import java.security.PrivilegedAction;
     27 import java.util.ArrayList;
     28 import java.util.Collections;
     29 import java.util.Iterator;
     30 import java.util.Locale;
     31 import java.util.Set;
     32 import java.util.SortedMap;
     33 import java.util.TreeMap;
     34 
     35 import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
     36 import org.apache.commons.compress.archivers.ar.ArArchiveOutputStream;
     37 import org.apache.commons.compress.archivers.arj.ArjArchiveInputStream;
     38 import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
     39 import org.apache.commons.compress.archivers.cpio.CpioArchiveOutputStream;
     40 import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
     41 import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
     42 import org.apache.commons.compress.archivers.jar.JarArchiveOutputStream;
     43 import org.apache.commons.compress.archivers.sevenz.SevenZFile;
     44 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
     45 import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
     46 import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
     47 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
     48 import org.apache.commons.compress.utils.IOUtils;
     49 import org.apache.commons.compress.utils.Lists;
     50 import org.apache.commons.compress.utils.ServiceLoaderIterator;
     51 import org.apache.commons.compress.utils.Sets;
     52 
     53 /**
     54  * Factory to create Archive[In|Out]putStreams from names or the first bytes of
     55  * the InputStream. In order to add other implementations, you should extend
     56  * ArchiveStreamFactory and override the appropriate methods (and call their
     57  * implementation from super of course).
     58  *
     59  * Compressing a ZIP-File:
     60  *
     61  * <pre>
     62  * final OutputStream out = Files.newOutputStream(output.toPath());
     63  * ArchiveOutputStream os = new ArchiveStreamFactory().createArchiveOutputStream(ArchiveStreamFactory.ZIP, out);
     64  *
     65  * os.putArchiveEntry(new ZipArchiveEntry("testdata/test1.xml"));
     66  * IOUtils.copy(Files.newInputStream(file1.toPath()), os);
     67  * os.closeArchiveEntry();
     68  *
     69  * os.putArchiveEntry(new ZipArchiveEntry("testdata/test2.xml"));
     70  * IOUtils.copy(Files.newInputStream(file2.toPath()), os);
     71  * os.closeArchiveEntry();
     72  * os.close();
     73  * </pre>
     74  *
     75  * Decompressing a ZIP-File:
     76  *
     77  * <pre>
     78  * final InputStream is = Files.newInputStream(input.toPath());
     79  * ArchiveInputStream in = new ArchiveStreamFactory().createArchiveInputStream(ArchiveStreamFactory.ZIP, is);
     80  * ZipArchiveEntry entry = (ZipArchiveEntry)in.getNextEntry();
     81  * OutputStream out = Files.newOutputStream(dir.toPath().resolve(entry.getName()));
     82  * IOUtils.copy(in, out);
     83  * out.close();
     84  * in.close();
     85  * </pre>
     86  * @Immutable provided that the deprecated method setEntryEncoding is not used.
     87  * @ThreadSafe even if the deprecated method setEntryEncoding is used
     88  */
     89 public class ArchiveStreamFactory implements ArchiveStreamProvider {
     90 
     91     private static final int TAR_HEADER_SIZE = 512;
     92 
     93     private static final int DUMP_SIGNATURE_SIZE = 32;
     94 
     95     private static final int SIGNATURE_SIZE = 12;
     96 
     97     private static final ArchiveStreamFactory SINGLETON = new ArchiveStreamFactory();
     98 
     99     /**
    100      * Constant (value {@value}) used to identify the AR archive format.
    101      * @since 1.1
    102      */
    103     public static final String AR = "ar";
    104 
    105     /**
    106      * Constant (value {@value}) used to identify the ARJ archive format.
    107      * Not supported as an output stream type.
    108      * @since 1.6
    109      */
    110     public static final String ARJ = "arj";
    111 
    112     /**
    113      * Constant (value {@value}) used to identify the CPIO archive format.
    114      * @since 1.1
    115      */
    116     public static final String CPIO = "cpio";
    117 
    118     /**
    119      * Constant (value {@value}) used to identify the Unix DUMP archive format.
    120      * Not supported as an output stream type.
    121      * @since 1.3
    122      */
    123     public static final String DUMP = "dump";
    124 
    125     /**
    126      * Constant (value {@value}) used to identify the JAR archive format.
    127      * @since 1.1
    128      */
    129     public static final String JAR = "jar";
    130 
    131     /**
    132      * Constant used to identify the TAR archive format.
    133      * @since 1.1
    134      */
    135     public static final String TAR = "tar";
    136 
    137     /**
    138      * Constant (value {@value}) used to identify the ZIP archive format.
    139      * @since 1.1
    140      */
    141     public static final String ZIP = "zip";
    142 
    143     /**
    144      * Constant (value {@value}) used to identify the 7z archive format.
    145      * @since 1.8
    146      */
    147     public static final String SEVEN_Z = "7z";
    148 
    149     /**
    150      * Entry encoding, null for the platform default.
    151      */
    152     private final String encoding;
    153 
    154     /**
    155      * Entry encoding, null for the default.
    156      */
    157     private volatile String entryEncoding;
    158 
    159     private SortedMap<String, ArchiveStreamProvider> archiveInputStreamProviders;
    160 
    161     private SortedMap<String, ArchiveStreamProvider> archiveOutputStreamProviders;
    162 
    163     private static ArrayList<ArchiveStreamProvider> findArchiveStreamProviders() {
    164         return Lists.newArrayList(serviceLoaderIterator());
    165     }
    166 
    167     static void putAll(Set<String> names, ArchiveStreamProvider provider,
    168             TreeMap<String, ArchiveStreamProvider> map) {
    169         for (String name : names) {
    170             map.put(toKey(name), provider);
    171         }
    172     }
    173 
    174     private static Iterator<ArchiveStreamProvider> serviceLoaderIterator() {
    175         return new ServiceLoaderIterator<>(ArchiveStreamProvider.class);
    176     }
    177 
    178     private static String toKey(final String name) {
    179         return name.toUpperCase(Locale.ROOT);
    180     }
    181 
    182     /**
    183      * Constructs a new sorted map from input stream provider names to provider
    184      * objects.
    185      *
    186      * <p>
    187      * The map returned by this method will have one entry for each provider for
    188      * which support is available in the current Java virtual machine. If two or
    189      * more supported provider have the same name then the resulting map will
    190      * contain just one of them; which one it will contain is not specified.
    191      * </p>
    192      *
    193      * <p>
    194      * The invocation of this method, and the subsequent use of the resulting
    195      * map, may cause time-consuming disk or network I/O operations to occur.
    196      * This method is provided for applications that need to enumerate all of
    197      * the available providers, for example to allow user provider selection.
    198      * </p>
    199      *
    200      * <p>
    201      * This method may return different results at different times if new
    202      * providers are dynamically made available to the current Java virtual
    203      * machine.
    204      * </p>
    205      *
    206      * @return An immutable, map from names to provider objects
    207      * @since 1.13
    208      */
    209     public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveInputStreamProviders() {
    210         return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
    211             @Override
    212             public SortedMap<String, ArchiveStreamProvider> run() {
    213                 TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
    214                 putAll(SINGLETON.getInputStreamArchiveNames(), SINGLETON, map);
    215                 for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
    216                     putAll(provider.getInputStreamArchiveNames(), provider, map);
    217                 }
    218                 return map;
    219             }
    220         });
    221     }
    222 
    223     /**
    224      * Constructs a new sorted map from output stream provider names to provider
    225      * objects.
    226      *
    227      * <p>
    228      * The map returned by this method will have one entry for each provider for
    229      * which support is available in the current Java virtual machine. If two or
    230      * more supported provider have the same name then the resulting map will
    231      * contain just one of them; which one it will contain is not specified.
    232      * </p>
    233      *
    234      * <p>
    235      * The invocation of this method, and the subsequent use of the resulting
    236      * map, may cause time-consuming disk or network I/O operations to occur.
    237      * This method is provided for applications that need to enumerate all of
    238      * the available providers, for example to allow user provider selection.
    239      * </p>
    240      *
    241      * <p>
    242      * This method may return different results at different times if new
    243      * providers are dynamically made available to the current Java virtual
    244      * machine.
    245      * </p>
    246      *
    247      * @return An immutable, map from names to provider objects
    248      * @since 1.13
    249      */
    250     public static SortedMap<String, ArchiveStreamProvider> findAvailableArchiveOutputStreamProviders() {
    251         return AccessController.doPrivileged(new PrivilegedAction<SortedMap<String, ArchiveStreamProvider>>() {
    252             @Override
    253             public SortedMap<String, ArchiveStreamProvider> run() {
    254                 TreeMap<String, ArchiveStreamProvider> map = new TreeMap<>();
    255                 putAll(SINGLETON.getOutputStreamArchiveNames(), SINGLETON, map);
    256                 for (ArchiveStreamProvider provider : findArchiveStreamProviders()) {
    257                     putAll(provider.getOutputStreamArchiveNames(), provider, map);
    258                 }
    259                 return map;
    260             }
    261         });
    262     }
    263 
    264     /**
    265      * Create an instance using the platform default encoding.
    266      */
    267     public ArchiveStreamFactory() {
    268         this(null);
    269     }
    270 
    271     /**
    272      * Create an instance using the specified encoding.
    273      *
    274      * @param encoding the encoding to be used.
    275      *
    276      * @since 1.10
    277      */
    278     public ArchiveStreamFactory(final String encoding) {
    279         super();
    280         this.encoding = encoding;
    281         // Also set the original field so can continue to use it.
    282         this.entryEncoding = encoding;
    283     }
    284 
    285     /**
    286      * Returns the encoding to use for arj, jar, zip, dump, cpio and tar
    287      * files, or null for the archiver default.
    288      *
    289      * @return entry encoding, or null for the archiver default
    290      * @since 1.5
    291      */
    292     public String getEntryEncoding() {
    293         return entryEncoding;
    294     }
    295 
    296     /**
    297      * Sets the encoding to use for arj, jar, zip, dump, cpio and tar files. Use null for the archiver default.
    298      *
    299      * @param entryEncoding the entry encoding, null uses the archiver default.
    300      * @since 1.5
    301      * @deprecated 1.10 use {@link #ArchiveStreamFactory(String)} to specify the encoding
    302      * @throws IllegalStateException if the constructor {@link #ArchiveStreamFactory(String)}
    303      * was used to specify the factory encoding.
    304      */
    305     @Deprecated
    306     public void setEntryEncoding(final String entryEncoding) {
    307         // Note: this does not detect new ArchiveStreamFactory(null) but that does not set the encoding anyway
    308         if (encoding != null) {
    309             throw new IllegalStateException("Cannot overide encoding set by the constructor");
    310         }
    311         this.entryEncoding = entryEncoding;
    312     }
    313 
    314     /**
    315      * Creates an archive input stream from an archiver name and an input stream.
    316      *
    317      * @param archiverName the archive name,
    318      * i.e. {@value #AR}, {@value #ARJ}, {@value #ZIP}, {@value #TAR}, {@value #JAR}, {@value #CPIO}, {@value #DUMP} or {@value #SEVEN_Z}
    319      * @param in the input stream
    320      * @return the archive input stream
    321      * @throws ArchiveException if the archiver name is not known
    322      * @throws StreamingNotSupportedException if the format cannot be
    323      * read from a stream
    324      * @throws IllegalArgumentException if the archiver name or stream is null
    325      */
    326     public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in)
    327             throws ArchiveException {
    328         return createArchiveInputStream(archiverName, in, entryEncoding);
    329     }
    330 
    331     @Override
    332     public ArchiveInputStream createArchiveInputStream(final String archiverName, final InputStream in,
    333             final String actualEncoding) throws ArchiveException {
    334 
    335         if (archiverName == null) {
    336             throw new IllegalArgumentException("Archivername must not be null.");
    337         }
    338 
    339         if (in == null) {
    340             throw new IllegalArgumentException("InputStream must not be null.");
    341         }
    342 
    343         if (AR.equalsIgnoreCase(archiverName)) {
    344             return new ArArchiveInputStream(in);
    345         }
    346         if (ARJ.equalsIgnoreCase(archiverName)) {
    347             if (actualEncoding != null) {
    348                 return new ArjArchiveInputStream(in, actualEncoding);
    349             }
    350             return new ArjArchiveInputStream(in);
    351         }
    352         if (ZIP.equalsIgnoreCase(archiverName)) {
    353             if (actualEncoding != null) {
    354                 return new ZipArchiveInputStream(in, actualEncoding);
    355             }
    356             return new ZipArchiveInputStream(in);
    357         }
    358         if (TAR.equalsIgnoreCase(archiverName)) {
    359             if (actualEncoding != null) {
    360                 return new TarArchiveInputStream(in, actualEncoding);
    361             }
    362             return new TarArchiveInputStream(in);
    363         }
    364         if (JAR.equalsIgnoreCase(archiverName)) {
    365             if (actualEncoding != null) {
    366                 return new JarArchiveInputStream(in, actualEncoding);
    367             }
    368             return new JarArchiveInputStream(in);
    369         }
    370         if (CPIO.equalsIgnoreCase(archiverName)) {
    371             if (actualEncoding != null) {
    372                 return new CpioArchiveInputStream(in, actualEncoding);
    373             }
    374             return new CpioArchiveInputStream(in);
    375         }
    376         if (DUMP.equalsIgnoreCase(archiverName)) {
    377             if (actualEncoding != null) {
    378                 return new DumpArchiveInputStream(in, actualEncoding);
    379             }
    380             return new DumpArchiveInputStream(in);
    381         }
    382         if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
    383             throw new StreamingNotSupportedException(SEVEN_Z);
    384         }
    385 
    386         final ArchiveStreamProvider archiveStreamProvider = getArchiveInputStreamProviders().get(toKey(archiverName));
    387         if (archiveStreamProvider != null) {
    388             return archiveStreamProvider.createArchiveInputStream(archiverName, in, actualEncoding);
    389         }
    390 
    391         throw new ArchiveException("Archiver: " + archiverName + " not found.");
    392     }
    393 
    394     /**
    395      * Creates an archive output stream from an archiver name and an output stream.
    396      *
    397      * @param archiverName the archive name,
    398      * i.e. {@value #AR}, {@value #ZIP}, {@value #TAR}, {@value #JAR} or {@value #CPIO}
    399      * @param out the output stream
    400      * @return the archive output stream
    401      * @throws ArchiveException if the archiver name is not known
    402      * @throws StreamingNotSupportedException if the format cannot be
    403      * written to a stream
    404      * @throws IllegalArgumentException if the archiver name or stream is null
    405      */
    406     public ArchiveOutputStream createArchiveOutputStream(final String archiverName, final OutputStream out)
    407             throws ArchiveException {
    408         return createArchiveOutputStream(archiverName, out, entryEncoding);
    409     }
    410 
    411     @Override
    412     public ArchiveOutputStream createArchiveOutputStream(
    413             final String archiverName, final OutputStream out, final String actualEncoding)
    414             throws ArchiveException {
    415         if (archiverName == null) {
    416             throw new IllegalArgumentException("Archivername must not be null.");
    417         }
    418         if (out == null) {
    419             throw new IllegalArgumentException("OutputStream must not be null.");
    420         }
    421 
    422         if (AR.equalsIgnoreCase(archiverName)) {
    423             return new ArArchiveOutputStream(out);
    424         }
    425         if (ZIP.equalsIgnoreCase(archiverName)) {
    426             final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(out);
    427             if (actualEncoding != null) {
    428                 zip.setEncoding(actualEncoding);
    429             }
    430             return zip;
    431         }
    432         if (TAR.equalsIgnoreCase(archiverName)) {
    433             if (actualEncoding != null) {
    434                 return new TarArchiveOutputStream(out, actualEncoding);
    435             }
    436             return new TarArchiveOutputStream(out);
    437         }
    438         if (JAR.equalsIgnoreCase(archiverName)) {
    439             if (actualEncoding != null) {
    440                 return new JarArchiveOutputStream(out, actualEncoding);
    441             }
    442             return new JarArchiveOutputStream(out);
    443         }
    444         if (CPIO.equalsIgnoreCase(archiverName)) {
    445             if (actualEncoding != null) {
    446                 return new CpioArchiveOutputStream(out, actualEncoding);
    447             }
    448             return new CpioArchiveOutputStream(out);
    449         }
    450         if (SEVEN_Z.equalsIgnoreCase(archiverName)) {
    451             throw new StreamingNotSupportedException(SEVEN_Z);
    452         }
    453 
    454         final ArchiveStreamProvider archiveStreamProvider = getArchiveOutputStreamProviders().get(toKey(archiverName));
    455         if (archiveStreamProvider != null) {
    456             return archiveStreamProvider.createArchiveOutputStream(archiverName, out, actualEncoding);
    457         }
    458 
    459         throw new ArchiveException("Archiver: " + archiverName + " not found.");
    460     }
    461 
    462     /**
    463      * Create an archive input stream from an input stream, autodetecting
    464      * the archive type from the first few bytes of the stream. The InputStream
    465      * must support marks, like BufferedInputStream.
    466      *
    467      * @param in the input stream
    468      * @return the archive input stream
    469      * @throws ArchiveException if the archiver name is not known
    470      * @throws StreamingNotSupportedException if the format cannot be
    471      * read from a stream
    472      * @throws IllegalArgumentException if the stream is null or does not support mark
    473      */
    474     public ArchiveInputStream createArchiveInputStream(final InputStream in)
    475             throws ArchiveException {
    476         return createArchiveInputStream(detect(in), in);
    477     }
    478 
    479     /**
    480      * Try to determine the type of Archiver
    481      * @param in input stream
    482      * @return type of archiver if found
    483      * @throws ArchiveException if an archiver cannot be detected in the stream
    484      * @since 1.14
    485      */
    486     public static String detect(InputStream in) throws ArchiveException {
    487         if (in == null) {
    488             throw new IllegalArgumentException("Stream must not be null.");
    489         }
    490 
    491         if (!in.markSupported()) {
    492             throw new IllegalArgumentException("Mark is not supported.");
    493         }
    494 
    495         final byte[] signature = new byte[SIGNATURE_SIZE];
    496         in.mark(signature.length);
    497         int signatureLength = -1;
    498         try {
    499             signatureLength = IOUtils.readFully(in, signature);
    500             in.reset();
    501         } catch (IOException e) {
    502             throw new ArchiveException("IOException while reading signature.", e);
    503         }
    504 
    505         if (ZipArchiveInputStream.matches(signature, signatureLength)) {
    506             return ZIP;
    507         } else if (JarArchiveInputStream.matches(signature, signatureLength)) {
    508             return JAR;
    509         } else if (ArArchiveInputStream.matches(signature, signatureLength)) {
    510             return AR;
    511         } else if (CpioArchiveInputStream.matches(signature, signatureLength)) {
    512             return CPIO;
    513         } else if (ArjArchiveInputStream.matches(signature, signatureLength)) {
    514             return ARJ;
    515         } else if (SevenZFile.matches(signature, signatureLength)) {
    516             return SEVEN_Z;
    517         }
    518 
    519         // Dump needs a bigger buffer to check the signature;
    520         final byte[] dumpsig = new byte[DUMP_SIGNATURE_SIZE];
    521         in.mark(dumpsig.length);
    522         try {
    523             signatureLength = IOUtils.readFully(in, dumpsig);
    524             in.reset();
    525         } catch (IOException e) {
    526             throw new ArchiveException("IOException while reading dump signature", e);
    527         }
    528         if (DumpArchiveInputStream.matches(dumpsig, signatureLength)) {
    529             return DUMP;
    530         }
    531 
    532         // Tar needs an even bigger buffer to check the signature; read the first block
    533         final byte[] tarHeader = new byte[TAR_HEADER_SIZE];
    534         in.mark(tarHeader.length);
    535         try {
    536             signatureLength = IOUtils.readFully(in, tarHeader);
    537             in.reset();
    538         } catch (IOException e) {
    539             throw new ArchiveException("IOException while reading tar signature", e);
    540         }
    541         if (TarArchiveInputStream.matches(tarHeader, signatureLength)) {
    542             return TAR;
    543         }
    544 
    545         // COMPRESS-117 - improve auto-recognition
    546         if (signatureLength >= TAR_HEADER_SIZE) {
    547             TarArchiveInputStream tais = null;
    548             try {
    549                 tais = new TarArchiveInputStream(new ByteArrayInputStream(tarHeader));
    550                 // COMPRESS-191 - verify the header checksum
    551                 if (tais.getNextTarEntry().isCheckSumOK()) {
    552                     return TAR;
    553                 }
    554             } catch (final Exception e) { // NOPMD // NOSONAR
    555                 // can generate IllegalArgumentException as well
    556                 // as IOException
    557                 // autodetection, simply not a TAR
    558                 // ignored
    559             } finally {
    560                 IOUtils.closeQuietly(tais);
    561             }
    562         }
    563         throw new ArchiveException("No Archiver found for the stream signature");
    564     }
    565 
    566     public SortedMap<String, ArchiveStreamProvider> getArchiveInputStreamProviders() {
    567         if (archiveInputStreamProviders == null) {
    568             archiveInputStreamProviders = Collections
    569                     .unmodifiableSortedMap(findAvailableArchiveInputStreamProviders());
    570         }
    571         return archiveInputStreamProviders;
    572     }
    573 
    574     public SortedMap<String, ArchiveStreamProvider> getArchiveOutputStreamProviders() {
    575         if (archiveOutputStreamProviders == null) {
    576             archiveOutputStreamProviders = Collections
    577                     .unmodifiableSortedMap(findAvailableArchiveOutputStreamProviders());
    578         }
    579         return archiveOutputStreamProviders;
    580     }
    581 
    582     @Override
    583     public Set<String> getInputStreamArchiveNames() {
    584         return Sets.newHashSet(AR, ARJ, ZIP, TAR, JAR, CPIO, DUMP, SEVEN_Z);
    585     }
    586 
    587     @Override
    588     public Set<String> getOutputStreamArchiveNames() {
    589         return Sets.newHashSet(AR, ZIP, TAR, JAR, CPIO, SEVEN_Z);
    590     }
    591 
    592 }
    593