Home | History | Annotate | Download | only in lz
      1 /*
      2  * Hash Chain match finder with 2-, 3-, and 4-byte hashing
      3  *
      4  * Authors: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *          Igor Pavlov <http://7-zip.org/>
      6  *
      7  * This file has been put into the public domain.
      8  * You can do whatever you want with this file.
      9  */
     10 
     11 package org.tukaani.xz.lz;
     12 
     13 import org.tukaani.xz.ArrayCache;
     14 
     15 final class HC4 extends LZEncoder {
     16     private final Hash234 hash;
     17     private final int[] chain;
     18     private final Matches matches;
     19     private final int depthLimit;
     20 
     21     private final int cyclicSize;
     22     private int cyclicPos = -1;
     23     private int lzPos;
     24 
     25     /**
     26      * Gets approximate memory usage of the match finder as kibibytes.
     27      */
     28     static int getMemoryUsage(int dictSize) {
     29         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
     30     }
     31 
     32     /**
     33      * Creates a new LZEncoder with the HC4 match finder.
     34      * See <code>LZEncoder.getInstance</code> for parameter descriptions.
     35      */
     36     HC4(int dictSize, int beforeSizeMin, int readAheadMax,
     37             int niceLen, int matchLenMax, int depthLimit,
     38             ArrayCache arrayCache) {
     39         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
     40               arrayCache);
     41 
     42         hash = new Hash234(dictSize, arrayCache);
     43 
     44         // +1 because we need dictSize bytes of history + the current byte.
     45         cyclicSize = dictSize + 1;
     46         chain = arrayCache.getIntArray(cyclicSize, false);
     47         lzPos = cyclicSize;
     48 
     49         // Substracting 1 because the shortest match that this match
     50         // finder can find is 2 bytes, so there's no need to reserve
     51         // space for one-byte matches.
     52         matches = new Matches(niceLen - 1);
     53 
     54         // Use a default depth limit if no other value was specified.
     55         // The default is just something based on experimentation;
     56         // it's nothing magic.
     57         this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
     58     }
     59 
     60     public void putArraysToCache(ArrayCache arrayCache) {
     61         arrayCache.putArray(chain);
     62         hash.putArraysToCache(arrayCache);
     63         super.putArraysToCache(arrayCache);
     64     }
     65 
     66     /**
     67      * Moves to the next byte, checks that there is enough available space,
     68      * and possibly normalizes the hash tables and the hash chain.
     69      *
     70      * @return      number of bytes available, including the current byte
     71      */
     72     private int movePos() {
     73         int avail = movePos(4, 4);
     74 
     75         if (avail != 0) {
     76             if (++lzPos == Integer.MAX_VALUE) {
     77                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
     78                 hash.normalize(normalizationOffset);
     79                 normalize(chain, cyclicSize, normalizationOffset);
     80                 lzPos -= normalizationOffset;
     81             }
     82 
     83             if (++cyclicPos == cyclicSize)
     84                 cyclicPos = 0;
     85         }
     86 
     87         return avail;
     88     }
     89 
     90     public Matches getMatches() {
     91         matches.count = 0;
     92         int matchLenLimit = matchLenMax;
     93         int niceLenLimit = niceLen;
     94         int avail = movePos();
     95 
     96         if (avail < matchLenLimit) {
     97             if (avail == 0)
     98                 return matches;
     99 
    100             matchLenLimit = avail;
    101             if (niceLenLimit > avail)
    102                 niceLenLimit = avail;
    103         }
    104 
    105         hash.calcHashes(buf, readPos);
    106         int delta2 = lzPos - hash.getHash2Pos();
    107         int delta3 = lzPos - hash.getHash3Pos();
    108         int currentMatch = hash.getHash4Pos();
    109         hash.updateTables(lzPos);
    110 
    111         chain[cyclicPos] = currentMatch;
    112 
    113         int lenBest = 0;
    114 
    115         // See if the hash from the first two bytes found a match.
    116         // The hashing algorithm guarantees that if the first byte
    117         // matches, also the second byte does, so there's no need to
    118         // test the second byte.
    119         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
    120             lenBest = 2;
    121             matches.len[0] = 2;
    122             matches.dist[0] = delta2 - 1;
    123             matches.count = 1;
    124         }
    125 
    126         // See if the hash from the first three bytes found a match that
    127         // is different from the match possibly found by the two-byte hash.
    128         // Also here the hashing algorithm guarantees that if the first byte
    129         // matches, also the next two bytes do.
    130         if (delta2 != delta3 && delta3 < cyclicSize
    131                 && buf[readPos - delta3] == buf[readPos]) {
    132             lenBest = 3;
    133             matches.dist[matches.count++] = delta3 - 1;
    134             delta2 = delta3;
    135         }
    136 
    137         // If a match was found, see how long it is.
    138         if (matches.count > 0) {
    139             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
    140                                               == buf[readPos + lenBest])
    141                 ++lenBest;
    142 
    143             matches.len[matches.count - 1] = lenBest;
    144 
    145             // Return if it is long enough (niceLen or reached the end of
    146             // the dictionary).
    147             if (lenBest >= niceLenLimit)
    148                 return matches;
    149         }
    150 
    151         // Long enough match wasn't found so easily. Look for better matches
    152         // from the hash chain.
    153         if (lenBest < 3)
    154             lenBest = 3;
    155 
    156         int depth = depthLimit;
    157 
    158         while (true) {
    159             int delta = lzPos - currentMatch;
    160 
    161             // Return if the search depth limit has been reached or
    162             // if the distance of the potential match exceeds the
    163             // dictionary size.
    164             if (depth-- == 0 || delta >= cyclicSize)
    165                 return matches;
    166 
    167             currentMatch = chain[cyclicPos - delta
    168                                  + (delta > cyclicPos ? cyclicSize : 0)];
    169 
    170             // Test the first byte and the first new byte that would give us
    171             // a match that is at least one byte longer than lenBest. This
    172             // too short matches get quickly skipped.
    173             if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
    174                     && buf[readPos - delta] == buf[readPos]) {
    175                 // Calculate the length of the match.
    176                 int len = 0;
    177                 while (++len < matchLenLimit)
    178                     if (buf[readPos + len - delta] != buf[readPos + len])
    179                         break;
    180 
    181                 // Use the match if and only if it is better than the longest
    182                 // match found so far.
    183                 if (len > lenBest) {
    184                     lenBest = len;
    185                     matches.len[matches.count] = len;
    186                     matches.dist[matches.count] = delta - 1;
    187                     ++matches.count;
    188 
    189                     // Return if it is long enough (niceLen or reached the
    190                     // end of the dictionary).
    191                     if (len >= niceLenLimit)
    192                         return matches;
    193                 }
    194             }
    195         }
    196     }
    197 
    198     public void skip(int len) {
    199         assert len >= 0;
    200 
    201         while (len-- > 0) {
    202             if (movePos() != 0) {
    203                 // Update the hash chain and hash tables.
    204                 hash.calcHashes(buf, readPos);
    205                 chain[cyclicPos] = hash.getHash4Pos();
    206                 hash.updateTables(lzPos);
    207             }
    208         }
    209     }
    210 }
    211