Home | History | Annotate | Download | only in zip
      1 /*
      2  *  Licensed to the Apache Software Foundation (ASF) under one or more
      3  *  contributor license agreements.  See the NOTICE file distributed with
      4  *  this work for additional information regarding copyright ownership.
      5  *  The ASF licenses this file to You under the Apache License, Version 2.0
      6  *  (the "License"); you may not use this file except in compliance with
      7  *  the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  *  Unless required by applicable law or agreed to in writing, software
     12  *  distributed under the License is distributed on an "AS IS" BASIS,
     13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  *  See the License for the specific language governing permissions and
     15  *  limitations under the License.
     16  *
     17  */
     18 
     19 package org.apache.commons.compress.archivers.zip;
     20 
     21 import static org.junit.Assert.*;
     22 
     23 import java.io.File;
     24 import java.io.FileInputStream;
     25 import java.io.IOException;
     26 import java.io.InputStream;
     27 import java.io.UnsupportedEncodingException;
     28 import java.nio.ByteBuffer;
     29 import java.util.Enumeration;
     30 import java.util.zip.CRC32;
     31 
     32 import org.apache.commons.compress.AbstractTestCase;
     33 import org.apache.commons.compress.utils.CharsetNames;
     34 import org.junit.Test;
     35 
     36 public class UTF8ZipFilesTest extends AbstractTestCase {
     37 
     38     private static final String CP437 = "cp437";
     39     private static final String ASCII_TXT = "ascii.txt";
     40     private static final String EURO_FOR_DOLLAR_TXT = "\u20AC_for_Dollar.txt";
     41     private static final String OIL_BARREL_TXT = "\u00D6lf\u00E4sser.txt";
     42 
     43     @Test
     44     public void testUtf8FileRoundtripExplicitUnicodeExtra()
     45         throws IOException {
     46         testFileRoundtrip(CharsetNames.UTF_8, true, true);
     47     }
     48 
     49     @Test
     50     public void testUtf8FileRoundtripNoEFSExplicitUnicodeExtra()
     51         throws IOException {
     52         testFileRoundtrip(CharsetNames.UTF_8, false, true);
     53     }
     54 
     55     @Test
     56     public void testCP437FileRoundtripExplicitUnicodeExtra()
     57         throws IOException {
     58         testFileRoundtrip(CP437, false, true);
     59     }
     60 
     61     @Test
     62     public void testASCIIFileRoundtripExplicitUnicodeExtra()
     63         throws IOException {
     64         testFileRoundtrip(CharsetNames.US_ASCII, false, true);
     65     }
     66 
     67     @Test
     68     public void testUtf8FileRoundtripImplicitUnicodeExtra()
     69         throws IOException {
     70         testFileRoundtrip(CharsetNames.UTF_8, true, false);
     71     }
     72 
     73     @Test
     74     public void testUtf8FileRoundtripNoEFSImplicitUnicodeExtra()
     75         throws IOException {
     76         testFileRoundtrip(CharsetNames.UTF_8, false, false);
     77     }
     78 
     79     @Test
     80     public void testCP437FileRoundtripImplicitUnicodeExtra()
     81         throws IOException {
     82         testFileRoundtrip(CP437, false, false);
     83     }
     84 
     85     @Test
     86     public void testASCIIFileRoundtripImplicitUnicodeExtra()
     87         throws IOException {
     88         testFileRoundtrip(CharsetNames.US_ASCII, false, false);
     89     }
     90 
     91     /*
     92      * 7-ZIP created archive, uses EFS to signal UTF-8 filenames.
     93      *
     94      * 7-ZIP doesn't use EFS for strings that can be encoded in CP437
     95      * - which is true for OIL_BARREL_TXT.
     96      */
     97     @Test
     98     public void testRead7ZipArchive() throws IOException {
     99         final File archive = getFile("utf8-7zip-test.zip");
    100         ZipFile zf = null;
    101         try {
    102             zf = new ZipFile(archive, CP437, false);
    103             assertNotNull(zf.getEntry(ASCII_TXT));
    104             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
    105             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
    106         } finally {
    107             ZipFile.closeQuietly(zf);
    108         }
    109     }
    110 
    111     @Test
    112     public void testRead7ZipArchiveForStream() throws IOException {
    113         final FileInputStream archive =
    114             new FileInputStream(getFile("utf8-7zip-test.zip"));
    115         ZipArchiveInputStream zi = null;
    116         try {
    117             zi = new ZipArchiveInputStream(archive, CP437, false);
    118             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
    119             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
    120             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
    121         } finally {
    122             if (zi != null) {
    123                 zi.close();
    124             }
    125         }
    126     }
    127 
    128     /*
    129      * WinZIP created archive, uses Unicode Extra Fields but only in
    130      * the central directory.
    131      */
    132     @Test
    133     public void testReadWinZipArchive() throws IOException {
    134         final File archive = getFile("utf8-winzip-test.zip");
    135         ZipFile zf = null;
    136         try {
    137             zf = new ZipFile(archive, null, true);
    138             assertCanRead(zf, ASCII_TXT);
    139             assertCanRead(zf, EURO_FOR_DOLLAR_TXT);
    140             assertCanRead(zf, OIL_BARREL_TXT);
    141         } finally {
    142             ZipFile.closeQuietly(zf);
    143         }
    144     }
    145 
    146     private void assertCanRead(final ZipFile zf, final String fileName) throws IOException {
    147         final ZipArchiveEntry entry = zf.getEntry(fileName);
    148         assertNotNull("Entry doesn't exist", entry);
    149         final InputStream is = zf.getInputStream(entry);
    150         assertNotNull("InputStream is null", is);
    151         try {
    152             is.read();
    153         } finally {
    154             is.close();
    155         }
    156     }
    157 
    158     @Test
    159     public void testReadWinZipArchiveForStream() throws IOException {
    160         final FileInputStream archive =
    161             new FileInputStream(getFile("utf8-winzip-test.zip"));
    162         ZipArchiveInputStream zi = null;
    163         try {
    164             zi = new ZipArchiveInputStream(archive, null, true);
    165             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
    166             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
    167             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
    168         } finally {
    169             if (zi != null) {
    170                 zi.close();
    171             }
    172         }
    173     }
    174 
    175     @Test
    176     public void testZipFileReadsUnicodeFields() throws IOException {
    177         final File file = File.createTempFile("unicode-test", ".zip");
    178         file.deleteOnExit();
    179         ZipArchiveInputStream zi = null;
    180         try {
    181             createTestFile(file, CharsetNames.US_ASCII, false, true);
    182             final FileInputStream archive = new FileInputStream(file);
    183             zi = new ZipArchiveInputStream(archive, CharsetNames.US_ASCII, true);
    184             assertEquals(OIL_BARREL_TXT, zi.getNextEntry().getName());
    185             assertEquals(EURO_FOR_DOLLAR_TXT, zi.getNextEntry().getName());
    186             assertEquals(ASCII_TXT, zi.getNextEntry().getName());
    187         } finally {
    188             if (zi != null) {
    189                 zi.close();
    190             }
    191             tryHardToDelete(file);
    192         }
    193     }
    194 
    195     @Test
    196     public void testZipArchiveInputStreamReadsUnicodeFields()
    197         throws IOException {
    198         final File file = File.createTempFile("unicode-test", ".zip");
    199         file.deleteOnExit();
    200         ZipFile zf = null;
    201         try {
    202             createTestFile(file, CharsetNames.US_ASCII, false, true);
    203             zf = new ZipFile(file, CharsetNames.US_ASCII, true);
    204             assertNotNull(zf.getEntry(ASCII_TXT));
    205             assertNotNull(zf.getEntry(EURO_FOR_DOLLAR_TXT));
    206             assertNotNull(zf.getEntry(OIL_BARREL_TXT));
    207         } finally {
    208             ZipFile.closeQuietly(zf);
    209             tryHardToDelete(file);
    210         }
    211     }
    212 
    213     @Test
    214     public void testRawNameReadFromZipFile()
    215         throws IOException {
    216         final File archive = getFile("utf8-7zip-test.zip");
    217         ZipFile zf = null;
    218         try {
    219             zf = new ZipFile(archive, CP437, false);
    220             assertRawNameOfAcsiiTxt(zf.getEntry(ASCII_TXT));
    221         } finally {
    222             ZipFile.closeQuietly(zf);
    223         }
    224     }
    225 
    226     @Test
    227     public void testRawNameReadFromStream()
    228         throws IOException {
    229         final FileInputStream archive =
    230             new FileInputStream(getFile("utf8-7zip-test.zip"));
    231         ZipArchiveInputStream zi = null;
    232         try {
    233             zi = new ZipArchiveInputStream(archive, CP437, false);
    234             assertRawNameOfAcsiiTxt((ZipArchiveEntry) zi.getNextEntry());
    235         } finally {
    236             if (zi != null) {
    237                 zi.close();
    238             }
    239         }
    240     }
    241 
    242     private static void testFileRoundtrip(final String encoding, final boolean withEFS,
    243                                           final boolean withExplicitUnicodeExtra)
    244         throws IOException {
    245 
    246         final File file = File.createTempFile(encoding + "-test", ".zip");
    247         file.deleteOnExit();
    248         try {
    249             createTestFile(file, encoding, withEFS, withExplicitUnicodeExtra);
    250             testFile(file, encoding);
    251         } finally {
    252             tryHardToDelete(file);
    253         }
    254     }
    255 
    256     private static void createTestFile(final File file, final String encoding,
    257                                        final boolean withEFS,
    258                                        final boolean withExplicitUnicodeExtra)
    259         throws UnsupportedEncodingException, IOException {
    260 
    261         final ZipEncoding zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
    262 
    263         ZipArchiveOutputStream zos = null;
    264         try {
    265             zos = new ZipArchiveOutputStream(file);
    266             zos.setEncoding(encoding);
    267             zos.setUseLanguageEncodingFlag(withEFS);
    268             zos.setCreateUnicodeExtraFields(withExplicitUnicodeExtra ?
    269                                             ZipArchiveOutputStream
    270                                             .UnicodeExtraFieldPolicy.NEVER
    271                                             : ZipArchiveOutputStream
    272                                             .UnicodeExtraFieldPolicy.ALWAYS);
    273 
    274             ZipArchiveEntry ze = new ZipArchiveEntry(OIL_BARREL_TXT);
    275             if (withExplicitUnicodeExtra
    276                 && !zipEncoding.canEncode(ze.getName())) {
    277 
    278                 final ByteBuffer en = zipEncoding.encode(ze.getName());
    279 
    280                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
    281                                                            en.array(),
    282                                                            en.arrayOffset(),
    283                                                            en.limit()
    284                                                            - en.position()));
    285             }
    286 
    287             zos.putArchiveEntry(ze);
    288             zos.write("Hello, world!".getBytes(CharsetNames.US_ASCII));
    289             zos.closeArchiveEntry();
    290 
    291             ze = new ZipArchiveEntry(EURO_FOR_DOLLAR_TXT);
    292             if (withExplicitUnicodeExtra
    293                 && !zipEncoding.canEncode(ze.getName())) {
    294 
    295                 final ByteBuffer en = zipEncoding.encode(ze.getName());
    296 
    297                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
    298                                                            en.array(),
    299                                                            en.arrayOffset(),
    300                                                            en.limit()
    301                                                            - en.position()));
    302             }
    303 
    304             zos.putArchiveEntry(ze);
    305             zos.write("Give me your money!".getBytes(CharsetNames.US_ASCII));
    306             zos.closeArchiveEntry();
    307 
    308             ze = new ZipArchiveEntry(ASCII_TXT);
    309 
    310             if (withExplicitUnicodeExtra
    311                 && !zipEncoding.canEncode(ze.getName())) {
    312 
    313                 final ByteBuffer en = zipEncoding.encode(ze.getName());
    314 
    315                 ze.addExtraField(new UnicodePathExtraField(ze.getName(),
    316                                                            en.array(),
    317                                                            en.arrayOffset(),
    318                                                            en.limit()
    319                                                            - en.position()));
    320             }
    321 
    322             zos.putArchiveEntry(ze);
    323             zos.write("ascii".getBytes(CharsetNames.US_ASCII));
    324             zos.closeArchiveEntry();
    325 
    326             zos.finish();
    327         } finally {
    328             if (zos != null) {
    329                 try {
    330                     zos.close();
    331                 } catch (final IOException e) { /* swallow */ }
    332             }
    333         }
    334     }
    335 
    336     private static void testFile(final File file, final String encoding)
    337         throws IOException {
    338         ZipFile zf = null;
    339         try {
    340             zf = new ZipFile(file, encoding, false);
    341 
    342             final Enumeration<ZipArchiveEntry> e = zf.getEntries();
    343             while (e.hasMoreElements()) {
    344                 final ZipArchiveEntry ze = e.nextElement();
    345 
    346                 if (ze.getName().endsWith("sser.txt")) {
    347                     assertUnicodeName(ze, OIL_BARREL_TXT, encoding);
    348 
    349                 } else if (ze.getName().endsWith("_for_Dollar.txt")) {
    350                     assertUnicodeName(ze, EURO_FOR_DOLLAR_TXT, encoding);
    351                 } else if (!ze.getName().equals(ASCII_TXT)) {
    352                     throw new AssertionError("Unrecognized ZIP entry with name ["
    353                                              + ze.getName() + "] found.");
    354                 }
    355             }
    356         } finally {
    357             ZipFile.closeQuietly(zf);
    358         }
    359     }
    360 
    361     private static UnicodePathExtraField findUniCodePath(final ZipArchiveEntry ze) {
    362         return (UnicodePathExtraField)
    363             ze.getExtraField(UnicodePathExtraField.UPATH_ID);
    364     }
    365 
    366     private static void assertUnicodeName(final ZipArchiveEntry ze,
    367                                           final String expectedName,
    368                                           final String encoding)
    369         throws IOException {
    370         if (!expectedName.equals(ze.getName())) {
    371             final UnicodePathExtraField ucpf = findUniCodePath(ze);
    372             assertNotNull(ucpf);
    373 
    374             final ZipEncoding enc = ZipEncodingHelper.getZipEncoding(encoding);
    375             final ByteBuffer ne = enc.encode(ze.getName());
    376 
    377             final CRC32 crc = new CRC32();
    378             crc.update(ne.array(), ne.arrayOffset(),
    379                        ne.limit() - ne.position());
    380 
    381             assertEquals(crc.getValue(), ucpf.getNameCRC32());
    382             assertEquals(expectedName, new String(ucpf.getUnicodeName(),
    383                                                   CharsetNames.UTF_8));
    384         }
    385     }
    386 
    387     @Test
    388     public void testUtf8Interoperability() throws IOException {
    389         final File file1 = getFile("utf8-7zip-test.zip");
    390         final File file2 = getFile("utf8-winzip-test.zip");
    391 
    392         testFile(file1,CP437);
    393         testFile(file2,CP437);
    394 
    395     }
    396 
    397     private static void assertRawNameOfAcsiiTxt(final ZipArchiveEntry ze) {
    398         final byte[] b = ze.getRawName();
    399         assertNotNull(b);
    400         final int len = ASCII_TXT.length();
    401         assertEquals(len, b.length);
    402         for (int i = 0; i < len; i++) {
    403             assertEquals("Byte " + i, (byte) ASCII_TXT.charAt(i), b[i]);
    404         }
    405         assertNotSame(b, ze.getRawName());
    406     }
    407 }
    408 
    409