Home | History | Annotate | Download | only in lz
      1 /*
      2  * Hash Chain match finder with 2-, 3-, and 4-byte hashing
      3  *
      4  * Authors: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *          Igor Pavlov <http://7-zip.org/>
      6  *
      7  * This file has been put into the public domain.
      8  * You can do whatever you want with this file.
      9  */
     10 
     11 package org.tukaani.xz.lz;
     12 
     13 final class HC4 extends LZEncoder {
     14     private final Hash234 hash;
     15     private final int[] chain;
     16     private final Matches matches;
     17     private final int depthLimit;
     18 
     19     private final int cyclicSize;
     20     private int cyclicPos = -1;
     21     private int lzPos;
     22 
     23     /**
     24      * Gets approximate memory usage of the match finder as kibibytes.
     25      */
     26     static int getMemoryUsage(int dictSize) {
     27         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
     28     }
     29 
     30     /**
     31      * Creates a new LZEncoder with the HC4 match finder.
     32      * See <code>LZEncoder.getInstance</code> for parameter descriptions.
     33      */
     34     HC4(int dictSize, int beforeSizeMin, int readAheadMax,
     35             int niceLen, int matchLenMax, int depthLimit) {
     36         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax);
     37 
     38         hash = new Hash234(dictSize);
     39 
     40         // +1 because we need dictSize bytes of history + the current byte.
     41         cyclicSize = dictSize + 1;
     42         chain = new int[cyclicSize];
     43         lzPos = cyclicSize;
     44 
     45         // Substracting 1 because the shortest match that this match
     46         // finder can find is 2 bytes, so there's no need to reserve
     47         // space for one-byte matches.
     48         matches = new Matches(niceLen - 1);
     49 
     50         // Use a default depth limit if no other value was specified.
     51         // The default is just something based on experimentation;
     52         // it's nothing magic.
     53         this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
     54     }
     55 
     56     /**
     57      * Moves to the next byte, checks that there is enough available space,
     58      * and possibly normalizes the hash tables and the hash chain.
     59      *
     60      * @return      number of bytes available, including the current byte
     61      */
     62     private int movePos() {
     63         int avail = movePos(4, 4);
     64 
     65         if (avail != 0) {
     66             if (++lzPos == Integer.MAX_VALUE) {
     67                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
     68                 hash.normalize(normalizationOffset);
     69                 normalize(chain, normalizationOffset);
     70                 lzPos -= normalizationOffset;
     71             }
     72 
     73             if (++cyclicPos == cyclicSize)
     74                 cyclicPos = 0;
     75         }
     76 
     77         return avail;
     78     }
     79 
     80     public Matches getMatches() {
     81         matches.count = 0;
     82         int matchLenLimit = matchLenMax;
     83         int niceLenLimit = niceLen;
     84         int avail = movePos();
     85 
     86         if (avail < matchLenLimit) {
     87             if (avail == 0)
     88                 return matches;
     89 
     90             matchLenLimit = avail;
     91             if (niceLenLimit > avail)
     92                 niceLenLimit = avail;
     93         }
     94 
     95         hash.calcHashes(buf, readPos);
     96         int delta2 = lzPos - hash.getHash2Pos();
     97         int delta3 = lzPos - hash.getHash3Pos();
     98         int currentMatch = hash.getHash4Pos();
     99         hash.updateTables(lzPos);
    100 
    101         chain[cyclicPos] = currentMatch;
    102 
    103         int lenBest = 0;
    104 
    105         // See if the hash from the first two bytes found a match.
    106         // The hashing algorithm guarantees that if the first byte
    107         // matches, also the second byte does, so there's no need to
    108         // test the second byte.
    109         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
    110             lenBest = 2;
    111             matches.len[0] = 2;
    112             matches.dist[0] = delta2 - 1;
    113             matches.count = 1;
    114         }
    115 
    116         // See if the hash from the first three bytes found a match that
    117         // is different from the match possibly found by the two-byte hash.
    118         // Also here the hashing algorithm guarantees that if the first byte
    119         // matches, also the next two bytes do.
    120         if (delta2 != delta3 && delta3 < cyclicSize
    121                 && buf[readPos - delta3] == buf[readPos]) {
    122             lenBest = 3;
    123             matches.dist[matches.count++] = delta3 - 1;
    124             delta2 = delta3;
    125         }
    126 
    127         // If a match was found, see how long it is.
    128         if (matches.count > 0) {
    129             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
    130                                               == buf[readPos + lenBest])
    131                 ++lenBest;
    132 
    133             matches.len[matches.count - 1] = lenBest;
    134 
    135             // Return if it is long enough (niceLen or reached the end of
    136             // the dictionary).
    137             if (lenBest >= niceLenLimit)
    138                 return matches;
    139         }
    140 
    141         // Long enough match wasn't found so easily. Look for better matches
    142         // from the hash chain.
    143         if (lenBest < 3)
    144             lenBest = 3;
    145 
    146         int depth = depthLimit;
    147 
    148         while (true) {
    149             int delta = lzPos - currentMatch;
    150 
    151             // Return if the search depth limit has been reached or
    152             // if the distance of the potential match exceeds the
    153             // dictionary size.
    154             if (depth-- == 0 || delta >= cyclicSize)
    155                 return matches;
    156 
    157             currentMatch = chain[cyclicPos - delta
    158                                  + (delta > cyclicPos ? cyclicSize : 0)];
    159 
    160             // Test the first byte and the first new byte that would give us
    161             // a match that is at least one byte longer than lenBest. This
    162             // too short matches get quickly skipped.
    163             if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
    164                     && buf[readPos - delta] == buf[readPos]) {
    165                 // Calculate the length of the match.
    166                 int len = 0;
    167                 while (++len < matchLenLimit)
    168                     if (buf[readPos + len - delta] != buf[readPos + len])
    169                         break;
    170 
    171                 // Use the match if and only if it is better than the longest
    172                 // match found so far.
    173                 if (len > lenBest) {
    174                     lenBest = len;
    175                     matches.len[matches.count] = len;
    176                     matches.dist[matches.count] = delta - 1;
    177                     ++matches.count;
    178 
    179                     // Return if it is long enough (niceLen or reached the
    180                     // end of the dictionary).
    181                     if (len >= niceLenLimit)
    182                         return matches;
    183                 }
    184             }
    185         }
    186     }
    187 
    188     public void skip(int len) {
    189         assert len >= 0;
    190 
    191         while (len-- > 0) {
    192             if (movePos() != 0) {
    193                 // Update the hash chain and hash tables.
    194                 hash.calcHashes(buf, readPos);
    195                 chain[cyclicPos] = hash.getHash4Pos();
    196                 hash.updateTables(lzPos);
    197             }
    198         }
    199     }
    200 }
    201