1 /* 2 * Hash Chain match finder with 2-, 3-, and 4-byte hashing 3 * 4 * Authors: Lasse Collin <lasse.collin (at) tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lz; 12 13 import org.tukaani.xz.ArrayCache; 14 15 final class HC4 extends LZEncoder { 16 private final Hash234 hash; 17 private final int[] chain; 18 private final Matches matches; 19 private final int depthLimit; 20 21 private final int cyclicSize; 22 private int cyclicPos = -1; 23 private int lzPos; 24 25 /** 26 * Gets approximate memory usage of the match finder as kibibytes. 27 */ 28 static int getMemoryUsage(int dictSize) { 29 return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10; 30 } 31 32 /** 33 * Creates a new LZEncoder with the HC4 match finder. 34 * See <code>LZEncoder.getInstance</code> for parameter descriptions. 35 */ 36 HC4(int dictSize, int beforeSizeMin, int readAheadMax, 37 int niceLen, int matchLenMax, int depthLimit, 38 ArrayCache arrayCache) { 39 super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax, 40 arrayCache); 41 42 hash = new Hash234(dictSize, arrayCache); 43 44 // +1 because we need dictSize bytes of history + the current byte. 45 cyclicSize = dictSize + 1; 46 chain = arrayCache.getIntArray(cyclicSize, false); 47 lzPos = cyclicSize; 48 49 // Substracting 1 because the shortest match that this match 50 // finder can find is 2 bytes, so there's no need to reserve 51 // space for one-byte matches. 52 matches = new Matches(niceLen - 1); 53 54 // Use a default depth limit if no other value was specified. 55 // The default is just something based on experimentation; 56 // it's nothing magic. 57 this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4; 58 } 59 60 public void putArraysToCache(ArrayCache arrayCache) { 61 arrayCache.putArray(chain); 62 hash.putArraysToCache(arrayCache); 63 super.putArraysToCache(arrayCache); 64 } 65 66 /** 67 * Moves to the next byte, checks that there is enough available space, 68 * and possibly normalizes the hash tables and the hash chain. 69 * 70 * @return number of bytes available, including the current byte 71 */ 72 private int movePos() { 73 int avail = movePos(4, 4); 74 75 if (avail != 0) { 76 if (++lzPos == Integer.MAX_VALUE) { 77 int normalizationOffset = Integer.MAX_VALUE - cyclicSize; 78 hash.normalize(normalizationOffset); 79 normalize(chain, cyclicSize, normalizationOffset); 80 lzPos -= normalizationOffset; 81 } 82 83 if (++cyclicPos == cyclicSize) 84 cyclicPos = 0; 85 } 86 87 return avail; 88 } 89 90 public Matches getMatches() { 91 matches.count = 0; 92 int matchLenLimit = matchLenMax; 93 int niceLenLimit = niceLen; 94 int avail = movePos(); 95 96 if (avail < matchLenLimit) { 97 if (avail == 0) 98 return matches; 99 100 matchLenLimit = avail; 101 if (niceLenLimit > avail) 102 niceLenLimit = avail; 103 } 104 105 hash.calcHashes(buf, readPos); 106 int delta2 = lzPos - hash.getHash2Pos(); 107 int delta3 = lzPos - hash.getHash3Pos(); 108 int currentMatch = hash.getHash4Pos(); 109 hash.updateTables(lzPos); 110 111 chain[cyclicPos] = currentMatch; 112 113 int lenBest = 0; 114 115 // See if the hash from the first two bytes found a match. 116 // The hashing algorithm guarantees that if the first byte 117 // matches, also the second byte does, so there's no need to 118 // test the second byte. 119 if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { 120 lenBest = 2; 121 matches.len[0] = 2; 122 matches.dist[0] = delta2 - 1; 123 matches.count = 1; 124 } 125 126 // See if the hash from the first three bytes found a match that 127 // is different from the match possibly found by the two-byte hash. 128 // Also here the hashing algorithm guarantees that if the first byte 129 // matches, also the next two bytes do. 130 if (delta2 != delta3 && delta3 < cyclicSize 131 && buf[readPos - delta3] == buf[readPos]) { 132 lenBest = 3; 133 matches.dist[matches.count++] = delta3 - 1; 134 delta2 = delta3; 135 } 136 137 // If a match was found, see how long it is. 138 if (matches.count > 0) { 139 while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] 140 == buf[readPos + lenBest]) 141 ++lenBest; 142 143 matches.len[matches.count - 1] = lenBest; 144 145 // Return if it is long enough (niceLen or reached the end of 146 // the dictionary). 147 if (lenBest >= niceLenLimit) 148 return matches; 149 } 150 151 // Long enough match wasn't found so easily. Look for better matches 152 // from the hash chain. 153 if (lenBest < 3) 154 lenBest = 3; 155 156 int depth = depthLimit; 157 158 while (true) { 159 int delta = lzPos - currentMatch; 160 161 // Return if the search depth limit has been reached or 162 // if the distance of the potential match exceeds the 163 // dictionary size. 164 if (depth-- == 0 || delta >= cyclicSize) 165 return matches; 166 167 currentMatch = chain[cyclicPos - delta 168 + (delta > cyclicPos ? cyclicSize : 0)]; 169 170 // Test the first byte and the first new byte that would give us 171 // a match that is at least one byte longer than lenBest. This 172 // too short matches get quickly skipped. 173 if (buf[readPos + lenBest - delta] == buf[readPos + lenBest] 174 && buf[readPos - delta] == buf[readPos]) { 175 // Calculate the length of the match. 176 int len = 0; 177 while (++len < matchLenLimit) 178 if (buf[readPos + len - delta] != buf[readPos + len]) 179 break; 180 181 // Use the match if and only if it is better than the longest 182 // match found so far. 183 if (len > lenBest) { 184 lenBest = len; 185 matches.len[matches.count] = len; 186 matches.dist[matches.count] = delta - 1; 187 ++matches.count; 188 189 // Return if it is long enough (niceLen or reached the 190 // end of the dictionary). 191 if (len >= niceLenLimit) 192 return matches; 193 } 194 } 195 } 196 } 197 198 public void skip(int len) { 199 assert len >= 0; 200 201 while (len-- > 0) { 202 if (movePos() != 0) { 203 // Update the hash chain and hash tables. 204 hash.calcHashes(buf, readPos); 205 chain[cyclicPos] = hash.getHash4Pos(); 206 hash.updateTables(lzPos); 207 } 208 } 209 } 210 } 211