1 /* 2 * Hash Chain match finder with 2-, 3-, and 4-byte hashing 3 * 4 * Authors: Lasse Collin <lasse.collin (at) tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lz; 12 13 final class HC4 extends LZEncoder { 14 private final Hash234 hash; 15 private final int[] chain; 16 private final Matches matches; 17 private final int depthLimit; 18 19 private final int cyclicSize; 20 private int cyclicPos = -1; 21 private int lzPos; 22 23 /** 24 * Gets approximate memory usage of the match finder as kibibytes. 25 */ 26 static int getMemoryUsage(int dictSize) { 27 return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10; 28 } 29 30 /** 31 * Creates a new LZEncoder with the HC4 match finder. 32 * See <code>LZEncoder.getInstance</code> for parameter descriptions. 33 */ 34 HC4(int dictSize, int beforeSizeMin, int readAheadMax, 35 int niceLen, int matchLenMax, int depthLimit) { 36 super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax); 37 38 hash = new Hash234(dictSize); 39 40 // +1 because we need dictSize bytes of history + the current byte. 41 cyclicSize = dictSize + 1; 42 chain = new int[cyclicSize]; 43 lzPos = cyclicSize; 44 45 // Substracting 1 because the shortest match that this match 46 // finder can find is 2 bytes, so there's no need to reserve 47 // space for one-byte matches. 48 matches = new Matches(niceLen - 1); 49 50 // Use a default depth limit if no other value was specified. 51 // The default is just something based on experimentation; 52 // it's nothing magic. 53 this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4; 54 } 55 56 /** 57 * Moves to the next byte, checks that there is enough available space, 58 * and possibly normalizes the hash tables and the hash chain. 59 * 60 * @return number of bytes available, including the current byte 61 */ 62 private int movePos() { 63 int avail = movePos(4, 4); 64 65 if (avail != 0) { 66 if (++lzPos == Integer.MAX_VALUE) { 67 int normalizationOffset = Integer.MAX_VALUE - cyclicSize; 68 hash.normalize(normalizationOffset); 69 normalize(chain, normalizationOffset); 70 lzPos -= normalizationOffset; 71 } 72 73 if (++cyclicPos == cyclicSize) 74 cyclicPos = 0; 75 } 76 77 return avail; 78 } 79 80 public Matches getMatches() { 81 matches.count = 0; 82 int matchLenLimit = matchLenMax; 83 int niceLenLimit = niceLen; 84 int avail = movePos(); 85 86 if (avail < matchLenLimit) { 87 if (avail == 0) 88 return matches; 89 90 matchLenLimit = avail; 91 if (niceLenLimit > avail) 92 niceLenLimit = avail; 93 } 94 95 hash.calcHashes(buf, readPos); 96 int delta2 = lzPos - hash.getHash2Pos(); 97 int delta3 = lzPos - hash.getHash3Pos(); 98 int currentMatch = hash.getHash4Pos(); 99 hash.updateTables(lzPos); 100 101 chain[cyclicPos] = currentMatch; 102 103 int lenBest = 0; 104 105 // See if the hash from the first two bytes found a match. 106 // The hashing algorithm guarantees that if the first byte 107 // matches, also the second byte does, so there's no need to 108 // test the second byte. 109 if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { 110 lenBest = 2; 111 matches.len[0] = 2; 112 matches.dist[0] = delta2 - 1; 113 matches.count = 1; 114 } 115 116 // See if the hash from the first three bytes found a match that 117 // is different from the match possibly found by the two-byte hash. 118 // Also here the hashing algorithm guarantees that if the first byte 119 // matches, also the next two bytes do. 120 if (delta2 != delta3 && delta3 < cyclicSize 121 && buf[readPos - delta3] == buf[readPos]) { 122 lenBest = 3; 123 matches.dist[matches.count++] = delta3 - 1; 124 delta2 = delta3; 125 } 126 127 // If a match was found, see how long it is. 128 if (matches.count > 0) { 129 while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] 130 == buf[readPos + lenBest]) 131 ++lenBest; 132 133 matches.len[matches.count - 1] = lenBest; 134 135 // Return if it is long enough (niceLen or reached the end of 136 // the dictionary). 137 if (lenBest >= niceLenLimit) 138 return matches; 139 } 140 141 // Long enough match wasn't found so easily. Look for better matches 142 // from the hash chain. 143 if (lenBest < 3) 144 lenBest = 3; 145 146 int depth = depthLimit; 147 148 while (true) { 149 int delta = lzPos - currentMatch; 150 151 // Return if the search depth limit has been reached or 152 // if the distance of the potential match exceeds the 153 // dictionary size. 154 if (depth-- == 0 || delta >= cyclicSize) 155 return matches; 156 157 currentMatch = chain[cyclicPos - delta 158 + (delta > cyclicPos ? cyclicSize : 0)]; 159 160 // Test the first byte and the first new byte that would give us 161 // a match that is at least one byte longer than lenBest. This 162 // too short matches get quickly skipped. 163 if (buf[readPos + lenBest - delta] == buf[readPos + lenBest] 164 && buf[readPos - delta] == buf[readPos]) { 165 // Calculate the length of the match. 166 int len = 0; 167 while (++len < matchLenLimit) 168 if (buf[readPos + len - delta] != buf[readPos + len]) 169 break; 170 171 // Use the match if and only if it is better than the longest 172 // match found so far. 173 if (len > lenBest) { 174 lenBest = len; 175 matches.len[matches.count] = len; 176 matches.dist[matches.count] = delta - 1; 177 ++matches.count; 178 179 // Return if it is long enough (niceLen or reached the 180 // end of the dictionary). 181 if (len >= niceLenLimit) 182 return matches; 183 } 184 } 185 } 186 } 187 188 public void skip(int len) { 189 assert len >= 0; 190 191 while (len-- > 0) { 192 if (movePos() != 0) { 193 // Update the hash chain and hash tables. 194 hash.calcHashes(buf, readPos); 195 chain[cyclicPos] = hash.getHash4Pos(); 196 hash.updateTables(lzPos); 197 } 198 } 199 } 200 } 201