1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.commons.compress.compressors.z; 20 21 import java.io.IOException; 22 import java.io.InputStream; 23 import java.nio.ByteOrder; 24 25 import org.apache.commons.compress.compressors.lzw.LZWInputStream; 26 27 /** 28 * Input stream that decompresses .Z files. 29 * @NotThreadSafe 30 * @since 1.7 31 */ 32 public class ZCompressorInputStream extends LZWInputStream { 33 private static final int MAGIC_1 = 0x1f; 34 private static final int MAGIC_2 = 0x9d; 35 private static final int BLOCK_MODE_MASK = 0x80; 36 private static final int MAX_CODE_SIZE_MASK = 0x1f; 37 private final boolean blockMode; 38 private final int maxCodeSize; 39 private long totalCodesRead = 0; 40 41 public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) 42 throws IOException { 43 super(inputStream, ByteOrder.LITTLE_ENDIAN); 44 final int firstByte = (int) in.readBits(8); 45 final int secondByte = (int) in.readBits(8); 46 final int thirdByte = (int) in.readBits(8); 47 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { 48 throw new IOException("Input is not in .Z format"); 49 } 50 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; 51 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; 52 if (blockMode) { 53 setClearCode(DEFAULT_CODE_SIZE); 54 } 55 initializeTables(maxCodeSize, memoryLimitInKb); 56 clearEntries(); 57 } 58 59 public ZCompressorInputStream(final InputStream inputStream) throws IOException { 60 this(inputStream, -1); 61 } 62 63 private void clearEntries() { 64 setTableSize((1 << 8) + (blockMode ? 1 : 0)); 65 } 66 67 /** 68 * {@inheritDoc} 69 * <p><strong>This method is only protected for technical reasons 70 * and is not part of Commons Compress' published API. It may 71 * change or disappear without warning.</strong></p> 72 */ 73 @Override 74 protected int readNextCode() throws IOException { 75 final int code = super.readNextCode(); 76 if (code >= 0) { 77 ++totalCodesRead; 78 } 79 return code; 80 } 81 82 private void reAlignReading() throws IOException { 83 // "compress" works in multiples of 8 symbols, each codeBits bits long. 84 // When codeBits changes, the remaining unused symbols in the current 85 // group of 8 are still written out, in the old codeSize, 86 // as garbage values (usually zeroes) that need to be skipped. 87 long codeReadsToThrowAway = 8 - (totalCodesRead % 8); 88 if (codeReadsToThrowAway == 8) { 89 codeReadsToThrowAway = 0; 90 } 91 for (long i = 0; i < codeReadsToThrowAway; i++) { 92 readNextCode(); 93 } 94 in.clearBitCache(); 95 } 96 97 /** 98 * {@inheritDoc} 99 * <p><strong>This method is only protected for technical reasons 100 * and is not part of Commons Compress' published API. It may 101 * change or disappear without warning.</strong></p> 102 */ 103 @Override 104 protected int addEntry(final int previousCode, final byte character) throws IOException { 105 final int maxTableSize = 1 << getCodeSize(); 106 final int r = addEntry(previousCode, character, maxTableSize); 107 if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) { 108 reAlignReading(); 109 incrementCodeSize(); 110 } 111 return r; 112 } 113 114 /** 115 * {@inheritDoc} 116 * <p><strong>This method is only protected for technical reasons 117 * and is not part of Commons Compress' published API. It may 118 * change or disappear without warning.</strong></p> 119 */ 120 @Override 121 protected int decompressNextSymbol() throws IOException { 122 // 123 // table entry table entry 124 // _____________ _____ 125 // table entry / \ / \ 126 // ____________/ \ \ 127 // / / \ / \ \ 128 // +---+---+---+---+---+---+---+---+---+---+ 129 // | . | . | . | . | . | . | . | . | . | . | 130 // +---+---+---+---+---+---+---+---+---+---+ 131 // |<--------->|<------------->|<----->|<->| 132 // symbol symbol symbol symbol 133 // 134 final int code = readNextCode(); 135 if (code < 0) { 136 return -1; 137 } else if (blockMode && code == getClearCode()) { 138 clearEntries(); 139 reAlignReading(); 140 resetCodeSize(); 141 resetPreviousCode(); 142 return 0; 143 } else { 144 boolean addedUnfinishedEntry = false; 145 if (code == getTableSize()) { 146 addRepeatOfPreviousCode(); 147 addedUnfinishedEntry = true; 148 } else if (code > getTableSize()) { 149 throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code)); 150 } 151 return expandCodeToOutputStack(code, addedUnfinishedEntry); 152 } 153 } 154 155 /** 156 * Checks if the signature matches what is expected for a Unix compress file. 157 * 158 * @param signature 159 * the bytes to check 160 * @param length 161 * the number of bytes to check 162 * @return true, if this stream is a Unix compress compressed 163 * stream, false otherwise 164 * 165 * @since 1.9 166 */ 167 public static boolean matches(final byte[] signature, final int length) { 168 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; 169 } 170 171 } 172