Home | History | Annotate | Download | only in lz
      1 /*
      2  * Binary Tree match finder with 2-, 3-, and 4-byte hashing
      3  *
      4  * Authors: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *          Igor Pavlov <http://7-zip.org/>
      6  *
      7  * This file has been put into the public domain.
      8  * You can do whatever you want with this file.
      9  */
     10 
     11 package org.tukaani.xz.lz;
     12 
     13 final class BT4 extends LZEncoder {
     14     private final Hash234 hash;
     15     private final int[] tree;
     16     private final Matches matches;
     17     private final int depthLimit;
     18 
     19     private final int cyclicSize;
     20     private int cyclicPos = -1;
     21     private int lzPos;
     22 
     23     static int getMemoryUsage(int dictSize) {
     24         return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
     25     }
     26 
     27     BT4(int dictSize, int beforeSizeMin, int readAheadMax,
     28             int niceLen, int matchLenMax, int depthLimit) {
     29         super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax);
     30 
     31         cyclicSize = dictSize + 1;
     32         lzPos = cyclicSize;
     33 
     34         hash = new Hash234(dictSize);
     35         tree = new int[cyclicSize * 2];
     36 
     37         // Substracting 1 because the shortest match that this match
     38         // finder can find is 2 bytes, so there's no need to reserve
     39         // space for one-byte matches.
     40         matches = new Matches(niceLen - 1);
     41 
     42         this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
     43     }
     44 
     45     private int movePos() {
     46         int avail = movePos(niceLen, 4);
     47 
     48         if (avail != 0) {
     49             if (++lzPos == Integer.MAX_VALUE) {
     50                 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
     51                 hash.normalize(normalizationOffset);
     52                 normalize(tree, normalizationOffset);
     53                 lzPos -= normalizationOffset;
     54             }
     55 
     56             if (++cyclicPos == cyclicSize)
     57                 cyclicPos = 0;
     58         }
     59 
     60         return avail;
     61     }
     62 
     63     public Matches getMatches() {
     64         matches.count = 0;
     65 
     66         int matchLenLimit = matchLenMax;
     67         int niceLenLimit = niceLen;
     68         int avail = movePos();
     69 
     70         if (avail < matchLenLimit) {
     71             if (avail == 0)
     72                 return matches;
     73 
     74             matchLenLimit = avail;
     75             if (niceLenLimit > avail)
     76                 niceLenLimit = avail;
     77         }
     78 
     79         hash.calcHashes(buf, readPos);
     80         int delta2 = lzPos - hash.getHash2Pos();
     81         int delta3 = lzPos - hash.getHash3Pos();
     82         int currentMatch = hash.getHash4Pos();
     83         hash.updateTables(lzPos);
     84 
     85         int lenBest = 0;
     86 
     87         // See if the hash from the first two bytes found a match.
     88         // The hashing algorithm guarantees that if the first byte
     89         // matches, also the second byte does, so there's no need to
     90         // test the second byte.
     91         if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
     92             lenBest = 2;
     93             matches.len[0] = 2;
     94             matches.dist[0] = delta2 - 1;
     95             matches.count = 1;
     96         }
     97 
     98         // See if the hash from the first three bytes found a match that
     99         // is different from the match possibly found by the two-byte hash.
    100         // Also here the hashing algorithm guarantees that if the first byte
    101         // matches, also the next two bytes do.
    102         if (delta2 != delta3 && delta3 < cyclicSize
    103                 && buf[readPos - delta3] == buf[readPos]) {
    104             lenBest = 3;
    105             matches.dist[matches.count++] = delta3 - 1;
    106             delta2 = delta3;
    107         }
    108 
    109         // If a match was found, see how long it is.
    110         if (matches.count > 0) {
    111             while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
    112                                               == buf[readPos + lenBest])
    113                 ++lenBest;
    114 
    115             matches.len[matches.count - 1] = lenBest;
    116 
    117             // Return if it is long enough (niceLen or reached the end of
    118             // the dictionary).
    119             if (lenBest >= niceLenLimit) {
    120                 skip(niceLenLimit, currentMatch);
    121                 return matches;
    122             }
    123         }
    124 
    125         // Long enough match wasn't found so easily. Look for better matches
    126         // from the binary tree.
    127         if (lenBest < 3)
    128             lenBest = 3;
    129 
    130         int depth = depthLimit;
    131 
    132         int ptr0 = (cyclicPos << 1) + 1;
    133         int ptr1 = cyclicPos << 1;
    134         int len0 = 0;
    135         int len1 = 0;
    136 
    137         while (true) {
    138             int delta = lzPos - currentMatch;
    139 
    140             // Return if the search depth limit has been reached or
    141             // if the distance of the potential match exceeds the
    142             // dictionary size.
    143             if (depth-- == 0 || delta >= cyclicSize) {
    144                 tree[ptr0] = 0;
    145                 tree[ptr1] = 0;
    146                 return matches;
    147             }
    148 
    149             int pair = (cyclicPos - delta
    150                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
    151             int len = Math.min(len0, len1);
    152 
    153             if (buf[readPos + len - delta] == buf[readPos + len]) {
    154                 while (++len < matchLenLimit)
    155                     if (buf[readPos + len - delta] != buf[readPos + len])
    156                         break;
    157 
    158                 if (len > lenBest) {
    159                     lenBest = len;
    160                     matches.len[matches.count] = len;
    161                     matches.dist[matches.count] = delta - 1;
    162                     ++matches.count;
    163 
    164                     if (len >= niceLenLimit) {
    165                         tree[ptr1] = tree[pair];
    166                         tree[ptr0] = tree[pair + 1];
    167                         return matches;
    168                     }
    169                 }
    170             }
    171 
    172             if ((buf[readPos + len - delta] & 0xFF)
    173                     < (buf[readPos + len] & 0xFF)) {
    174                 tree[ptr1] = currentMatch;
    175                 ptr1 = pair + 1;
    176                 currentMatch = tree[ptr1];
    177                 len1 = len;
    178             } else {
    179                 tree[ptr0] = currentMatch;
    180                 ptr0 = pair;
    181                 currentMatch = tree[ptr0];
    182                 len0 = len;
    183             }
    184         }
    185     }
    186 
    187     private void skip(int niceLenLimit, int currentMatch) {
    188         int depth = depthLimit;
    189 
    190         int ptr0 = (cyclicPos << 1) + 1;
    191         int ptr1 = cyclicPos << 1;
    192         int len0 = 0;
    193         int len1 = 0;
    194 
    195         while (true) {
    196             int delta = lzPos - currentMatch;
    197 
    198             if (depth-- == 0 || delta >= cyclicSize) {
    199                 tree[ptr0] = 0;
    200                 tree[ptr1] = 0;
    201                 return;
    202             }
    203 
    204             int pair = (cyclicPos - delta
    205                         + (delta > cyclicPos ? cyclicSize : 0)) << 1;
    206             int len = Math.min(len0, len1);
    207 
    208             if (buf[readPos + len - delta] == buf[readPos + len]) {
    209                 // No need to look for longer matches than niceLenLimit
    210                 // because we only are updating the tree, not returning
    211                 // matches found to the caller.
    212                 do {
    213                     if (++len == niceLenLimit) {
    214                         tree[ptr1] = tree[pair];
    215                         tree[ptr0] = tree[pair + 1];
    216                         return;
    217                     }
    218                 } while (buf[readPos + len - delta] == buf[readPos + len]);
    219             }
    220 
    221             if ((buf[readPos + len - delta] & 0xFF)
    222                     < (buf[readPos + len] & 0xFF)) {
    223                 tree[ptr1] = currentMatch;
    224                 ptr1 = pair + 1;
    225                 currentMatch = tree[ptr1];
    226                 len1 = len;
    227             } else {
    228                 tree[ptr0] = currentMatch;
    229                 ptr0 = pair;
    230                 currentMatch = tree[ptr0];
    231                 len0 = len;
    232             }
    233         }
    234     }
    235 
    236     public void skip(int len) {
    237         while (len-- > 0) {
    238             int niceLenLimit = niceLen;
    239             int avail = movePos();
    240 
    241             if (avail < niceLenLimit) {
    242                 if (avail == 0)
    243                     continue;
    244 
    245                 niceLenLimit = avail;
    246             }
    247 
    248             hash.calcHashes(buf, readPos);
    249             int currentMatch = hash.getHash4Pos();
    250             hash.updateTables(lzPos);
    251 
    252             skip(niceLenLimit, currentMatch);
    253         }
    254     }
    255 }
    256